diff --git a/Cargo.lock b/Cargo.lock index ea26a3a5b..d42fb99ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,14 +94,14 @@ name = "arch" version = "0.1.0" dependencies = [ "arch_gen", - "kvm-bindings", - "kvm-ioctls", + "kvm-bindings 0.14.0", + "kvm-ioctls 0.24.0", "libc", "smbios", "tdx", "utils", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util 0.15.0", ] [[package]] @@ -321,6 +321,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "rand_core 0.10.0", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -365,13 +376,22 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "cpuid" version = "0.1.0" dependencies = [ - "kvm-bindings", - "kvm-ioctls", - "vmm-sys-util 0.14.0", + "kvm-bindings 0.14.0", + "kvm-ioctls 0.24.0", + "vmm-sys-util 0.15.0", ] [[package]] @@ -420,8 +440,8 @@ dependencies = [ "imago", "krun_display", "krun_input", - "kvm-bindings", - "kvm-ioctls", + "kvm-bindings 0.14.0", + "kvm-ioctls 0.24.0", "libc", "libloading", "log", @@ -429,10 +449,11 @@ dependencies = [ "nix 0.30.1", "pipewire", "polly", - "rand", + "rand 0.9.2", "rutabaga_gfx", "thiserror 2.0.18", "utils", + "vhost", "virtio-bindings", "vm-fdt", "vm-memory", @@ -521,6 +542,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foldhash" version = "0.2.0" @@ -633,16 +660,39 @@ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "rand_core 0.10.0", + "wasip2", + "wasip3", +] + [[package]] name = "glob" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -651,7 +701,7 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.2.0", ] [[package]] @@ -670,6 +720,12 @@ dependencies = [ "log", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "imago" version = "0.2.2" @@ -697,7 +753,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -766,7 +824,7 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom", + "getrandom 0.3.4", "libc", ] @@ -835,6 +893,15 @@ dependencies = [ "vmm-sys-util 0.14.0", ] +[[package]] +name = "kvm-bindings" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b3c06ff73c7ce03e780887ec2389d62d2a2a9ddf471ab05c2ff69207cd3f3b4" +dependencies = [ + "vmm-sys-util 0.15.0", +] + [[package]] name = "kvm-ioctls" version = "0.22.1" @@ -842,11 +909,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c8f7370330b4f57981e300fa39b02088f2f2a5c2d0f1f994e8090589619c56d" dependencies = [ "bitflags 2.11.0", - "kvm-bindings", + "kvm-bindings 0.12.1", "libc", "vmm-sys-util 0.14.0", ] +[[package]] +name = "kvm-ioctls" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "333f77a20344a448f3f70664918135fddeb804e938f28a99d685bd92926e0b19" +dependencies = [ + "bitflags 2.11.0", + "kvm-bindings 0.14.0", + "libc", + "vmm-sys-util 0.15.0", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -859,6 +938,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.183" @@ -876,15 +961,15 @@ dependencies = [ "hvf", "krun_display", "krun_input", - "kvm-bindings", - "kvm-ioctls", + "kvm-bindings 0.14.0", + "kvm-ioctls 0.24.0", "libc", "libloading", "log", "nitro-enclaves 0.5.0", "once_cell", "polly", - "rand", + "rand 0.9.2", "utils", "vm-memory", "vmm", @@ -967,7 +1052,7 @@ version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" dependencies = [ - "hashbrown", + "hashbrown 0.16.1", ] [[package]] @@ -1019,7 +1104,7 @@ dependencies = [ "bitflags 2.11.0", "libc", "nix 0.26.4", - "rand", + "rand 0.9.2", "vsock", ] @@ -1201,6 +1286,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -1225,6 +1320,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.9.2" @@ -1232,7 +1333,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.0", ] [[package]] @@ -1242,7 +1354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -1251,9 +1363,15 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "getrandom", + "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "redox_syscall" version = "0.7.3" @@ -1355,7 +1473,7 @@ dependencies = [ "pkg-config", "remain", "thiserror 1.0.69", - "vmm-sys-util 0.14.0", + "vmm-sys-util 0.15.0", "winapi", "zerocopy", ] @@ -1425,7 +1543,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -1565,8 +1683,8 @@ checksum = "ad59e5bf374211a1fdd8e7439a07d5a5e617fe97f5cf21d03bcd1bf8c82b73af" dependencies = [ "bitflags 2.11.0", "iocuddle", - "kvm-bindings", - "kvm-ioctls", + "kvm-bindings 0.12.1", + "kvm-ioctls 0.22.1", "libc", "uuid", "vmm-sys-util 0.12.1", @@ -1710,6 +1828,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unty" version = "0.0.4" @@ -1728,11 +1852,11 @@ version = "0.1.0" dependencies = [ "bitflags 1.3.2", "crossbeam-channel", - "kvm-bindings", + "kvm-bindings 0.14.0", "libc", "log", "nix 0.30.1", - "vmm-sys-util 0.14.0", + "vmm-sys-util 0.15.0", ] [[package]] @@ -1741,7 +1865,9 @@ version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" dependencies = [ + "getrandom 0.4.2", "js-sys", + "rand 0.10.0", "wasm-bindgen", ] @@ -1757,6 +1883,19 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vhost" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee90657203a8644e9a0860a0db6a7887d8ef0c7bc09fc22dfa4ae75df65bac86" +dependencies = [ + "bitflags 2.11.0", + "libc", + "uuid", + "vm-memory", + "vmm-sys-util 0.15.0", +] + [[package]] name = "virtio-bindings" version = "0.2.7" @@ -1805,8 +1944,8 @@ dependencies = [ "kernel", "krun_display", "krun_input", - "kvm-bindings", - "kvm-ioctls", + "kvm-bindings 0.14.0", + "kvm-ioctls 0.24.0", "libc", "linux-loader", "log", @@ -1817,7 +1956,7 @@ dependencies = [ "tdx", "utils", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util 0.15.0", "zstd", ] @@ -1841,6 +1980,16 @@ dependencies = [ "libc", ] +[[package]] +name = "vmm-sys-util" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "506c62fdf617a5176827c2f9afbcf1be155b03a9b4bf9617a60dbc07e3a1642f" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "vsock" version = "0.5.3" @@ -1860,6 +2009,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" version = "0.2.114" @@ -1905,6 +2063,40 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1956,6 +2148,88 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "xattr" diff --git a/Makefile b/Makefile index 793bee438..011c78a08 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,9 @@ endif ifeq ($(INPUT),1) FEATURE_FLAGS += --features input endif +ifeq ($(VHOST_USER),1) + FEATURE_FLAGS += --features vhost-user +endif ifeq ($(AWS_NITRO),1) VARIANT = -awsnitro FEATURE_FLAGS := --features aws-nitro,net diff --git a/examples/chroot_vm.c b/examples/chroot_vm.c index 4c25dabd3..4b0393e1a 100644 --- a/examples/chroot_vm.c +++ b/examples/chroot_vm.c @@ -38,7 +38,10 @@ static void print_help(char *const name) " --log=PATH Write libkrun log to file or named pipe at PATH\n" " --color-log=PATH Write libkrun log to file or named pipe at PATH, use color\n" " --net=NET_MODE Set network mode\n" - " --passt-socket=PATH Instead of starting passt, connect to passt socket at PATH" + " --passt-socket=PATH Instead of starting passt, connect to passt socket at PATH\n" + " --vhost-user-rng=PATH Use vhost-user RNG backend at socket PATH\n" + " --vhost-user-snd=PATH Use vhost-user sound backend at socket PATH\n" + " --vhost-user-vsock=PATH Use vhost-user vsock backend at socket PATH\n" "NET_MODE can be either TSI (default) or PASST\n" "\n" "NEWROOT: the root directory of the vm\n" @@ -48,12 +51,25 @@ static void print_help(char *const name) ); } +static bool check_krun_error(int err, const char *msg) +{ + if (err) { + errno = -err; + perror(msg); + return false; + } + return true; +} + static const struct option long_options[] = { { "help", no_argument, NULL, 'h' }, { "log", required_argument, NULL, 'L' }, { "color-log", required_argument, NULL, 'C' }, { "net_mode", required_argument, NULL, 'N' }, { "passt-socket", required_argument, NULL, 'P' }, + { "vhost-user-rng", required_argument, NULL, 'V' }, + { "vhost-user-snd", required_argument, NULL, 'S' }, + { "vhost-user-vsock", required_argument, NULL, 'K' }, { NULL, 0, NULL, 0 } }; @@ -63,6 +79,9 @@ struct cmdline { uint32_t log_style; enum net_mode net_mode; char const *passt_socket_path; + char const *vhost_user_rng_socket; + char const *vhost_user_snd_socket; + char const *vhost_user_vsock_socket; char const *new_root; char *const *guest_argv; }; @@ -89,6 +108,9 @@ bool parse_cmdline(int argc, char *const argv[], struct cmdline *cmdline) .show_help = false, .net_mode = NET_MODE_TSI, .passt_socket_path = NULL, + .vhost_user_rng_socket = NULL, + .vhost_user_snd_socket = NULL, + .vhost_user_vsock_socket = NULL, .new_root = NULL, .guest_argv = NULL, .log_target = KRUN_LOG_TARGET_DEFAULT, @@ -124,6 +146,15 @@ bool parse_cmdline(int argc, char *const argv[], struct cmdline *cmdline) case 'P': cmdline->passt_socket_path = optarg; break; + case 'V': + cmdline->vhost_user_rng_socket = optarg; + break; + case 'S': + cmdline->vhost_user_snd_socket = optarg; + break; + case 'K': + cmdline->vhost_user_vsock_socket = optarg; + break; case '?': return false; default: @@ -249,6 +280,49 @@ int main(int argc, char *const argv[]) return -1; } + // Configure vhost-user RNG if requested + if (cmdline.vhost_user_rng_socket != NULL) { + // Test sentinel-terminated array: auto-detect queue count, use custom size + uint16_t custom_sizes[] = {512, 0}; // 0 = sentinel terminator + + if (!check_krun_error(krun_add_vhost_user_device(ctx_id, KRUN_VIRTIO_DEVICE_RNG, + cmdline.vhost_user_rng_socket, NULL, 0, custom_sizes), + "Error adding vhost-user RNG device")) { + return -1; + } + printf("Using vhost-user RNG backend at %s (custom queue size: 512)\n", cmdline.vhost_user_rng_socket); + } + + // Configure vhost-user sound if requested + if (cmdline.vhost_user_snd_socket != NULL) { + if (!check_krun_error(krun_add_vhost_user_device(ctx_id, KRUN_VIRTIO_DEVICE_SND, + cmdline.vhost_user_snd_socket, NULL, + KRUN_VHOST_USER_SND_NUM_QUEUES, + KRUN_VHOST_USER_SND_QUEUE_SIZES), + "Error adding vhost-user sound device")) { + return -1; + } + printf("Using vhost-user sound backend at %s\n", cmdline.vhost_user_snd_socket); + } + + // Configure vhost-user vsock if requested + if (cmdline.vhost_user_vsock_socket != NULL) { + // Disable the implicit vsock device to avoid conflict + if (!check_krun_error(krun_disable_implicit_vsock(ctx_id), + "Error disabling implicit vsock")) { + return -1; + } + + if (!check_krun_error(krun_add_vhost_user_device(ctx_id, KRUN_VIRTIO_DEVICE_VSOCK, + cmdline.vhost_user_vsock_socket, NULL, + KRUN_VHOST_USER_VSOCK_NUM_QUEUES, + KRUN_VHOST_USER_VSOCK_QUEUE_SIZES), + "Error adding vhost-user vsock device")) { + return -1; + } + printf("Using vhost-user vsock backend at %s\n", cmdline.vhost_user_vsock_socket); + } + // Raise RLIMIT_NOFILE to the maximum allowed to create some room for virtio-fs getrlimit(RLIMIT_NOFILE, &rlim); rlim.rlim_cur = rlim.rlim_max; @@ -269,7 +343,8 @@ int main(int argc, char *const argv[]) } // Map port 18000 in the host to 8000 in the guest (if networking uses TSI) - if (cmdline.net_mode == NET_MODE_TSI) { + // Skip port mapping when using vhost-user-vsock (TSI requires built-in vsock) + if (cmdline.net_mode == NET_MODE_TSI && cmdline.vhost_user_vsock_socket == NULL) { if (err = krun_set_port_map(ctx_id, &port_map[0])) { errno = -err; perror("Error configuring port map"); diff --git a/include/libkrun.h b/include/libkrun.h index 325c59eed..87f555c64 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -717,6 +717,91 @@ int krun_add_input_device_fd(uint32_t ctx_id, int input_fd); */ int32_t krun_set_snd_device(uint32_t ctx_id, bool enable); +/** + * Vhost-user device types. + * These correspond to virtio device type IDs for devices. + */ +#define KRUN_VIRTIO_DEVICE_RNG 4 +#define KRUN_VIRTIO_DEVICE_VSOCK 19 +#define KRUN_VIRTIO_DEVICE_SND 25 +#define KRUN_VIRTIO_DEVICE_CAN 36 + +/** + * Vhost-user RNG device default queue configuration. + * Use these when you want explicit defaults instead of auto-detection. + */ +#define KRUN_VHOST_USER_RNG_NUM_QUEUES 1 +#define KRUN_VHOST_USER_RNG_QUEUE_SIZES ((uint16_t[]){256}) + +/** + * Vhost-user sound device default queue configuration. + * Sound device uses 4 queues: control (idx 0), event (idx 1), TX/playback (idx 2), RX/capture (idx 3). + */ +#define KRUN_VHOST_USER_SND_NUM_QUEUES 4 +#define KRUN_VHOST_USER_SND_QUEUE_SIZES ((uint16_t[]){64, 64, 64, 64}) + +/** + * Vhost-user vsock device default queue configuration. + * Vsock device uses 3 queues: RX (idx 0), TX (idx 1), event (idx 2). + */ +#define KRUN_VHOST_USER_VSOCK_NUM_QUEUES 3 +#define KRUN_VHOST_USER_VSOCK_QUEUE_SIZES ((uint16_t[]){128, 128, 128}) + +/** + * Add a vhost-user device to the VM. + * + * This function adds a vhost-user device by connecting to an external + * backend process (e.g., vhost-device-rng, vhost-device-snd). The backend + * must be running and listening on the specified socket before starting the VM. + * + * This API is designed for devices like RNG, sound, and CAN. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "device_type" - type of vhost-user device (e.g., KRUN_VHOST_USER_DEVICE_RNG). + * "socket_path" - path to the vhost-user Unix domain socket (e.g., "/tmp/vhost-rng.sock"). + * "name" - device name for logging/debugging (e.g., "vhost-rng", "vhost-snd"). + * NULL = auto-generate from device_type ("vhost-user-4", "vhost-user-25", etc.) + * "num_queues" - number of virtqueues. + * 0 = auto-detect from backend (requires backend MQ support). + * >0 = explicit queue count. + * Or use device-specific constants like KRUN_VHOST_USER_RNG_NUM_QUEUES. + * "queue_sizes" - array of queue sizes for each queue. + * NULL = use default size (256) for all queues. + * When num_queues=0 (auto-detect): array must be 0-terminated (sentinel). + * When num_queues>0 (explicit): array must have exactly num_queues elements. + * Use device-specific constants like KRUN_VHOST_USER_RNG_QUEUE_SIZES for defaults. + * + * Examples: + * // Auto-detect queue count, use default size (256) + * krun_add_vhost_user_device(ctx, KRUN_VHOST_USER_DEVICE_RNG, "/tmp/rng.sock", NULL, 0, NULL); + * + * // Auto-detect queue count, use custom size (512) for all queues + * uint16_t custom_size[] = {512, 0}; // 0 = sentinel terminator + * krun_add_vhost_user_device(ctx, KRUN_VHOST_USER_DEVICE_RNG, "/tmp/rng.sock", NULL, 0, custom_size); + * + * // Explicit defaults using #define constants + * krun_add_vhost_user_device(ctx, KRUN_VHOST_USER_DEVICE_RNG, "/tmp/rng.sock", "vhost-rng", + * KRUN_VHOST_USER_RNG_NUM_QUEUES, + * KRUN_VHOST_USER_RNG_QUEUE_SIZES); + * + * // Explicit queue count with custom sizes + * uint16_t sizes[] = {256, 512}; + * krun_add_vhost_user_device(ctx, KRUN_VHOST_USER_DEVICE_SND, "/tmp/snd.sock", "vhost-snd", 2, sizes); + * + * Returns: + * Zero on success or a negative error number on failure. + * -EINVAL - Invalid parameters (device_type, socket_path, etc.) + * -ENOENT - Context doesn't exist + * -ENOTSUP - vhost-user support not compiled in + */ +int32_t krun_add_vhost_user_device(uint32_t ctx_id, + uint32_t device_type, + const char *socket_path, + const char *name, + uint16_t num_queues, + const uint16_t *queue_sizes); + /** * Configures a map of rlimits to be set in the guest before starting the isolated binary. * diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index 2b39cfa80..b4e77e452 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -13,15 +13,15 @@ efi = [] [dependencies] libc = ">=0.2.39" vm-memory = { version = "0.17", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" arch_gen = { path = "../arch_gen" } smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" tdx = { version = "0.1.0", optional = true } [dev-dependencies] diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index b80f24c6f..ee4ee6e2f 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -8,8 +8,8 @@ edition = "2021" tdx = [] [dependencies] -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 94a331202..b7c5a0687 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -18,6 +18,7 @@ input = ["zerocopy", "krun_input"] virgl_resource_map2 = [] aws-nitro = [] test_utils = [] +vhost-user = ["vhost"] [dependencies] bitflags = "1.2.0" @@ -29,6 +30,7 @@ nix = { version = "0.30.1", features = ["ioctl", "net", "poll", "socket", "fs"] pw = { package = "pipewire", version = "0.8.0", optional = true } rand = "0.9.2" thiserror = { version = "2.0", optional = true } +vhost = { version = "0.16", optional = true, features = ["vhost-user-frontend"] } virtio-bindings = "0.2.0" vm-memory = { version = "0.17", features = ["backend-mmap"] } zerocopy = { version = "0.8.26", optional = true, features = ["derive"] } @@ -48,8 +50,8 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/devices/src/virtio/mod.rs b/src/devices/src/virtio/mod.rs index 384aef5ac..64f19c35b 100644 --- a/src/devices/src/virtio/mod.rs +++ b/src/devices/src/virtio/mod.rs @@ -36,6 +36,8 @@ mod queue; pub mod rng; #[cfg(feature = "snd")] pub mod snd; +#[cfg(feature = "vhost-user")] +pub mod vhost_user; pub mod vsock; #[cfg(not(feature = "tee"))] @@ -56,6 +58,8 @@ pub use self::queue::{Descriptor, DescriptorChain, Queue}; pub use self::rng::*; #[cfg(feature = "snd")] pub use self::snd::Snd; +#[cfg(feature = "vhost-user")] +pub use self::vhost_user::VhostUserDevice; pub use self::vsock::*; /// When the driver initializes the device, it lets the device know about the diff --git a/src/devices/src/virtio/vhost_user/device.rs b/src/devices/src/virtio/vhost_user/device.rs new file mode 100644 index 000000000..e1961c0a6 --- /dev/null +++ b/src/devices/src/virtio/vhost_user/device.rs @@ -0,0 +1,571 @@ +// Copyright 2026, Red Hat Inc. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Generic vhost-user device wrapper. +//! +//! This module provides a wrapper around the vhost crate's Frontend, +//! adapting it to work with libkrun's VirtioDevice trait. + +use std::io::{self, ErrorKind, Result as IoResult}; +use std::os::fd::AsRawFd; +use std::os::unix::net::UnixStream; +use std::sync::{Arc, Mutex}; + +use log::{debug, error, warn}; +use polly::event_manager::{EventManager, Subscriber}; +use utils::epoll::{EpollEvent, EventSet}; +use utils::eventfd::{EventFd, EFD_NONBLOCK}; +use vhost::vhost_user::message::VhostUserConfigFlags; +use vhost::vhost_user::{Frontend, VhostUserFrontend, VhostUserProtocolFeatures}; +use vhost::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; +use vm_memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; + +use crate::virtio::{ + ActivateError, ActivateResult, DeviceQueue, DeviceState, InterruptTransport, QueueConfig, + VirtioDevice, +}; + +/// VHOST_USER_F_PROTOCOL_FEATURES (bit 30) is a backend-only feature +/// that enables vhost-user protocol extensions. It's not a virtio feature. +const VHOST_USER_F_PROTOCOL_FEATURES: u64 = 1 << 30; + +/// Generic vhost-user device wrapper. +/// +/// This wraps a vhost-user backend connection and implements the VirtioDevice +/// trait, allowing it to be used like any other virtio device in libkrun. +pub struct VhostUserDevice { + /// Vhost-user frontend connection + frontend: Arc>, + + /// Device type (e.g., VIRTIO_ID_RNG = 4) + device_type: u32, + + /// Device name for logging + device_name: String, + + /// Queue configurations + queue_configs: Vec, + + /// Available features from the backend + avail_features: u64, + + /// Whether the backend supports protocol features + has_protocol_features: bool, + + /// Acknowledged features + acked_features: u64, + + /// Device state + device_state: DeviceState, + + /// Activation event (registered with EventManager) + activate_evt: EventFd, + + /// Vring call event (backend->VMM interrupt notification) + vring_call_event: Option, +} + +impl VhostUserDevice { + /// Create a new vhost-user device by connecting to a socket. + /// + /// # Arguments + /// + /// * `socket_path` - Path to the vhost-user Unix domain socket + /// * `device_type` - Virtio device type ID + /// * `device_name` - Human-readable device name for logging + /// * `num_queues` - Number of queues (0 = query backend via MQ protocol) + /// * `queue_sizes` - Size for each queue (empty = use default 256) + /// + /// # Returns + /// + /// A new VhostUserDevice or an error if connection fails. + pub fn new( + socket_path: &str, + device_type: u32, + device_name: String, + num_queues: u16, + queue_sizes: &[u16], + ) -> IoResult { + debug!("Connecting to vhost-user backend at {}", socket_path); + + // Connect to the vhost-user backend + let stream = UnixStream::connect(socket_path)?; + // NOTE: `num_queues` could be 0 here, but this is actually fine + // because if `VhostUserProtocolFeatures::MQ` is supported the negotiated + // value will be used automatically by Frontend + let mut frontend = Frontend::from_stream(stream, num_queues as u64); + + // Get available features from backend + let avail_features = frontend + .get_features() + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + debug!("{}: backend features: 0x{:x}", device_name, avail_features); + + // Strip the vhost specific bit to leave only standard virtio features + let has_protocol_features = avail_features & VHOST_USER_F_PROTOCOL_FEATURES != 0; + let avail_features = avail_features & !VHOST_USER_F_PROTOCOL_FEATURES; + + if has_protocol_features { + let protocol_features = frontend + .get_protocol_features() + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + let mut our_protocol_features = VhostUserProtocolFeatures::empty(); + if protocol_features.contains(VhostUserProtocolFeatures::CONFIG) { + our_protocol_features |= VhostUserProtocolFeatures::CONFIG; + } + if protocol_features.contains(VhostUserProtocolFeatures::MQ) { + our_protocol_features |= VhostUserProtocolFeatures::MQ; + } + + frontend + .set_protocol_features(our_protocol_features) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + } + + // Determine actual queue count - may require protocol feature negotiation + let actual_num_queues = if num_queues == 0 { + if has_protocol_features { + let backend_queue_num = frontend + .get_queue_num() + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + debug!( + "{}: backend reports {} queues available", + device_name, backend_queue_num + ); + + backend_queue_num as usize + } else { + return Err(io::Error::new( + ErrorKind::InvalidInput, + "Backend doesn't support protocol features, must specify queue count", + )); + } + } else { + num_queues as usize + }; + + let default_size = queue_sizes.last().copied().unwrap_or(256); + let queue_configs: Vec<_> = (0..actual_num_queues) + .map(|i| { + let size = queue_sizes.get(i).copied().unwrap_or(default_size); + QueueConfig::new(size) + }) + .collect(); + + Ok(VhostUserDevice { + frontend: Arc::new(Mutex::new(frontend)), + device_type, + device_name, + queue_configs, + avail_features, + has_protocol_features, + acked_features: 0, + device_state: DeviceState::Inactive, + activate_evt: EventFd::new(EFD_NONBLOCK)?, + vring_call_event: None, + }) + } + + /// Activate the vhost-user device by setting up memory and vrings. + fn activate_vhost_user( + &mut self, + mem: &GuestMemoryMmap, + queues: &[DeviceQueue], + ) -> IoResult<()> { + let mut frontend = self.frontend.lock().unwrap(); + + debug!("{}: activating vhost-user device", self.device_name); + + // Combine guest-acked features with backend-only features (QEMU approach) + let backend_feature_bits = if self.has_protocol_features { + self.acked_features | VHOST_USER_F_PROTOCOL_FEATURES + } else { + self.acked_features + }; + + frontend + .set_owner() + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + // Only share memory regions that have file backing (memfd) + let regions: Vec = mem + .iter() + .filter_map(|region| { + if region.file_offset().is_some() { + Some(VhostUserMemoryRegionInfo::from_guest_region(region)) + } else { + None + } + }) + .collect::, _>>() + .map_err(|e| { + error!( + "{}: failed to convert memory regions: {:?}", + self.device_name, e + ); + io::Error::new(ErrorKind::Other, e) + })?; + + debug!( + "{}: sharing {} file-backed regions with backend", + self.device_name, + regions.len() + ); + + frontend.set_mem_table(®ions).map_err(|e| { + error!("{}: set_mem_table failed: {:?}", self.device_name, e); + io::Error::new(ErrorKind::Other, e) + })?; + + // If protocol features not negotiated, this triggers automatic ring enabling + frontend + .set_features(backend_feature_bits) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + let vring_call_event = EventFd::new(EFD_NONBLOCK)?; + + for (queue_index, device_queue) in queues.iter().enumerate() { + let queue = &device_queue.queue; + + frontend + .set_vring_num(queue_index, queue.actual_size()) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + // Set vring base + frontend + .set_vring_base(queue_index, 0) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + // Vring addresses in queue are GPAs, but vhost-user protocol expects VMM VAs + let desc_table_gpa = queue.desc_table.0; + let avail_ring_gpa = queue.avail_ring.0; + let used_ring_gpa = queue.used_ring.0; + + let desc_table_vmm = + mem.get_host_address(Address::new(desc_table_gpa)) + .map_err(|_| { + io::Error::new( + ErrorKind::InvalidInput, + format!("GPA 0x{:x} not found in any memory region", desc_table_gpa), + ) + })? as u64; + let avail_ring_vmm = + mem.get_host_address(Address::new(avail_ring_gpa)) + .map_err(|_| { + io::Error::new( + ErrorKind::InvalidInput, + format!("GPA 0x{:x} not found in any memory region", avail_ring_gpa), + ) + })? as u64; + let used_ring_vmm = mem + .get_host_address(Address::new(used_ring_gpa)) + .map_err(|_| { + io::Error::new( + ErrorKind::InvalidInput, + format!("GPA 0x{:x} not found in any memory region", used_ring_gpa), + ) + })? as u64; + + let vring_config = VringConfigData { + flags: 0, + queue_max_size: queue.get_max_size(), + queue_size: queue.actual_size(), + desc_table_addr: desc_table_vmm, + used_ring_addr: used_ring_vmm, + avail_ring_addr: avail_ring_vmm, + log_addr: None, + }; + + frontend + .set_vring_addr(queue_index, &vring_config) + .map_err(|e| { + error!("{}: set_vring_addr failed: {:?}", self.device_name, e); + io::Error::new(ErrorKind::Other, e) + })?; + + frontend + .set_vring_kick(queue_index, &device_queue.event) + .map_err(|e| { + error!("{}: set_vring_kick failed: {:?}", self.device_name, e); + io::Error::new(ErrorKind::Other, e) + })?; + + frontend + .set_vring_call(queue_index, &vring_call_event) + .map_err(|e| { + error!("{}: set_vring_call failed: {:?}", self.device_name, e); + io::Error::new(ErrorKind::Other, e) + })?; + + // Per QEMU vhost.c: when VHOST_USER_F_PROTOCOL_FEATURES is not negotiated, + // the rings start directly in the enabled state, and set_vring_enable will fail. + if self.has_protocol_features { + frontend + .set_vring_enable(queue_index, true) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + } else { + debug!( + "{}: vring {} already enabled (protocol features not negotiated)", + self.device_name, queue_index + ); + } + } + + self.vring_call_event = Some(vring_call_event); + + debug!( + "{}: vhost-user device activated successfully", + self.device_name + ); + + Ok(()) + } +} + +impl VirtioDevice for VhostUserDevice { + fn device_type(&self) -> u32 { + self.device_type + } + + fn device_name(&self) -> &str { + &self.device_name + } + + fn queue_config(&self) -> &[QueueConfig] { + &self.queue_configs + } + + fn avail_features(&self) -> u64 { + self.avail_features + } + + fn acked_features(&self) -> u64 { + self.acked_features + } + + fn set_acked_features(&mut self, acked_features: u64) { + self.acked_features = acked_features; + } + + fn read_config(&self, offset: u64, data: &mut [u8]) { + // Fetch config from backend on every read (same as QEMU/crosvm) + // No caching to avoid invalidation issues + if self.has_protocol_features { + if let Ok(mut frontend) = self.frontend.lock() { + match frontend.get_config( + offset as u32, + data.len() as u32, + VhostUserConfigFlags::empty(), + data, + ) { + Ok((_, returned_buf)) => { + if data.len() <= returned_buf.len() { + data.copy_from_slice(&returned_buf[..data.len()]); + debug!( + "{}: read {} bytes from config at offset {}", + self.device_name, + data.len(), + offset + ); + return; + } + } + Err(e) => { + debug!( + "{}: failed to read config from backend: {:?}", + self.device_name, e + ); + } + } + } + } + + debug!( + "{}: config read at offset {} returning zeros (backend not available)", + self.device_name, offset + ); + data.fill(0); + } + + fn write_config(&mut self, offset: u64, _data: &[u8]) { + // For now, configuration space writes are not supported + // This can be extended using VHOST_USER_SET_CONFIG + debug!( + "{}: config write at offset {} (not yet implemented)", + self.device_name, offset + ); + } + + fn activate( + &mut self, + mem: GuestMemoryMmap, + interrupt: InterruptTransport, + queues: Vec, + ) -> ActivateResult { + if let Err(e) = self.activate_vhost_user(&mem, &queues) { + error!( + "{}: failed to activate vhost-user device: {}", + self.device_name, e + ); + return Err(ActivateError::BadActivate); + } + + self.device_state = DeviceState::Activated(mem, interrupt); + + if let Err(e) = self.activate_evt.write(1) { + error!( + "{}: failed to write activate event: {}", + self.device_name, e + ); + return Err(ActivateError::BadActivate); + } + + Ok(()) + } + + fn is_activated(&self) -> bool { + matches!(self.device_state, DeviceState::Activated(_, _)) + } + + fn reset(&mut self) -> bool { + debug!("{}: resetting vhost-user device", self.device_name); + + // Disable all vrings + if let Ok(mut frontend) = self.frontend.lock() { + for queue_index in 0..self.queue_configs.len() { + if let Err(e) = frontend.set_vring_enable(queue_index, false) { + debug!( + "{}: failed to disable vring {} during reset: {}", + self.device_name, queue_index, e + ); + } + } + } + + self.vring_call_event = None; + self.device_state = DeviceState::Inactive; + true + } +} + +impl VhostUserDevice { + fn handle_vring_call_event(&mut self, event: &EpollEvent) { + debug!("{}: vring call event received", self.device_name); + + let event_set = event.event_set(); + if event_set != EventSet::IN { + warn!( + "{}: vring call unexpected event {event_set:?}", + self.device_name + ); + return; + } + + if let Some(ref vring_call_event) = self.vring_call_event { + if let Err(e) = vring_call_event.read() { + error!( + "{}: failed to read vring_call_event: {}", + self.device_name, e + ); + return; + } + } else { + error!("{}: vring_call_event is None", self.device_name); + return; + } + + if let DeviceState::Activated(_, ref interrupt) = self.device_state { + debug!( + "{}: interrupt received from backend, signaling guest", + self.device_name + ); + interrupt.signal_used_queue(); + } + } + + fn handle_activate_event(&mut self, event_manager: &mut EventManager) { + debug!("{}: activate event", self.device_name); + + if let Err(e) = self.activate_evt.read() { + error!( + "{}: failed to consume activate event: {}", + self.device_name, e + ); + } + + if let Some(ref vring_call_event) = self.vring_call_event { + let self_subscriber = event_manager + .subscriber(self.activate_evt.as_raw_fd()) + .unwrap(); + + event_manager + .register( + vring_call_event.as_raw_fd(), + EpollEvent::new(EventSet::IN, vring_call_event.as_raw_fd() as u64), + self_subscriber.clone(), + ) + .unwrap_or_else(|e| { + error!( + "{}: failed to register vring_call_event with event manager: {e:?}", + self.device_name + ); + }); + } else { + error!( + "{}: vring_call_event is None during activation", + self.device_name + ); + } + + // Unregister activate_evt as it's only needed once + event_manager + .unregister(self.activate_evt.as_raw_fd()) + .unwrap_or_else(|e| { + error!( + "{}: failed to unregister activate event: {e:?}", + self.device_name + ); + }); + } +} + +impl Subscriber for VhostUserDevice { + fn process(&mut self, event: &EpollEvent, event_manager: &mut EventManager) { + let source = event.fd(); + let activate_evt_fd = self.activate_evt.as_raw_fd(); + let vring_call_fd = self + .vring_call_event + .as_ref() + .map(|e| e.as_raw_fd()) + .unwrap_or(-1); + + if self.is_activated() { + match source { + _ if source == vring_call_fd => self.handle_vring_call_event(event), + _ if source == activate_evt_fd => self.handle_activate_event(event_manager), + _ => warn!( + "{}: unexpected event received: {source:?}", + self.device_name + ), + } + } else if source == activate_evt_fd { + // Allow activation event even before device is activated + self.handle_activate_event(event_manager); + } else { + warn!( + "{}: device not yet activated, spurious event received: {source:?}", + self.device_name + ); + } + } + + fn interest_list(&self) -> Vec { + vec![EpollEvent::new( + EventSet::IN, + self.activate_evt.as_raw_fd() as u64, + )] + } +} diff --git a/src/devices/src/virtio/vhost_user/mod.rs b/src/devices/src/virtio/vhost_user/mod.rs new file mode 100644 index 000000000..96e0c6883 --- /dev/null +++ b/src/devices/src/virtio/vhost_user/mod.rs @@ -0,0 +1,11 @@ +// Copyright 2026, Red Hat Inc. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Vhost-user device implementations for libkrun. +//! +//! This module provides vhost-user frontend support, allowing virtio devices +//! to run in separate processes for better isolation and flexibility. + +mod device; + +pub use device::VhostUserDevice; diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 282216abb..714f997fe 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -17,6 +17,7 @@ snd = [] input = ["krun_input", "vmm/input", "devices/input"] virgl_resource_map2 = [] aws-nitro = [ "dep:aws-nitro", "dep:nitro-enclaves" ] +vhost-user = ["vmm/vhost-user", "devices/vhost-user"] [dependencies] crossbeam-channel = ">=0.5.15" @@ -38,8 +39,8 @@ rand = "0.9.2" hvf = { path = "../hvf" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" aws-nitro = { path = "../aws_nitro", optional = true } nitro-enclaves = { version = "0.5.0", optional = true } vm-memory = { version = "0.17", features = ["backend-mmap"] } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index ceae52c80..962e8ed3f 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -70,6 +70,9 @@ use krun_input::{InputConfigBackend, InputEventProviderBackend}; const KRUN_SUCCESS: i32 = 0; // Maximum number of arguments/environment variables we allow const MAX_ARGS: usize = 4096; +/// Maximum number of virtqueues allowed by virtio spec (16-bit queue index: 0-65535) +#[cfg(feature = "vhost-user")] +const VIRTIO_MAX_QUEUES: usize = 65536; // krunfw library name for each context #[cfg(all(target_os = "linux", not(feature = "tee")))] @@ -1778,6 +1781,91 @@ pub unsafe extern "C" fn krun_set_snd_device(ctx_id: u32, enable: bool) -> i32 { KRUN_SUCCESS } +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(feature = "vhost-user")] +pub unsafe extern "C" fn krun_add_vhost_user_device( + ctx_id: u32, + device_type: u32, + socket_path: *const c_char, + name: *const c_char, + num_queues: u16, + queue_sizes: *const u16, +) -> i32 { + use vmm::resources::VhostUserDeviceConfig; + + let socket_path_str = match CStr::from_ptr(socket_path).to_str() { + Ok(s) => s, + Err(_) => return -libc::EINVAL, + }; + + if socket_path_str.is_empty() { + return -libc::EINVAL; + } + + let name_opt = if name.is_null() { + None + } else { + match CStr::from_ptr(name).to_str() { + Ok(s) if !s.is_empty() => Some(s.to_string()), + _ => None, + } + }; + + let queue_sizes_vec = if queue_sizes.is_null() { + Vec::new() + } else if num_queues == 0 { + // Auto-detect mode: read queue_sizes until we hit 0 (sentinel) + let mut sizes = Vec::new(); + let mut i = 0; + loop { + let size = *queue_sizes.add(i); + if size == 0 { + break; + } + sizes.push(size); + i += 1; + + // Safety: prevent infinite loop if user forgets sentinel terminator + if i >= VIRTIO_MAX_QUEUES { + return -libc::EINVAL; + } + } + sizes + } else { + std::slice::from_raw_parts(queue_sizes, num_queues as usize).to_vec() + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + cfg.vmr.vhost_user_devices.push(VhostUserDeviceConfig { + device_type, + socket_path: socket_path_str.to_string(), + name: name_opt, + num_queues, + queue_sizes: queue_sizes_vec, + }); + KRUN_SUCCESS + } + Entry::Vacant(_) => -libc::ENOENT, + } +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(feature = "vhost-user"))] +pub unsafe extern "C" fn krun_add_vhost_user_device( + _ctx_id: u32, + _device_type: u32, + _socket_path: *const c_char, + _name: *const c_char, + _num_queues: u16, + _queue_sizes: *const u16, +) -> i32 { + -libc::ENOTSUP +} + #[allow(unused_assignments)] #[no_mangle] pub extern "C" fn krun_get_shutdown_eventfd(ctx_id: u32) -> i32 { diff --git a/src/rutabaga_gfx/Cargo.toml b/src/rutabaga_gfx/Cargo.toml index 90f6ed4e2..b6b6852c3 100644 --- a/src/rutabaga_gfx/Cargo.toml +++ b/src/rutabaga_gfx/Cargo.toml @@ -26,7 +26,7 @@ remain = "0.2" thiserror = "1.0.23" zerocopy = { version = "0.8.26", features = ["derive"] } log = "0.4" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(unix)'.dependencies] nix = { version = "0.30.1", features = ["event", "feature", "fs", "mman", "socket", "uio", "ioctl"] } diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index 71bbda825..de9ee7c17 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -9,11 +9,11 @@ bitflags = "1.2.0" libc = ">=0.2.85" log = "0.4.0" nix = "0.30.1" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" crossbeam-channel = ">=0.5.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } [target.'cfg(target_os = "macos")'.dependencies] nix = { version = "0.30.1", features = ["fs"] } \ No newline at end of file diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 2bc07ffa8..995c73be2 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -16,6 +16,7 @@ gpu = ["krun_display"] snd = [] input = ["krun_input"] aws-nitro = [] +vhost-user = ["devices/vhost-user"] [dependencies] crossbeam-channel = ">=0.5.15" @@ -25,7 +26,7 @@ linux-loader = { version = "0.13.2", features = ["bzimage", "elf", "pe"] } log = "0.4.0" nix = { version = "0.30.1", features = ["fs", "term"] } vm-memory = { version = "0.17.0", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" krun_display = { path = "../krun_display", optional = true, features = ["bindgen_clang_runtime"] } krun_input = { path = "../krun_input", optional = true, features = ["bindgen_clang_runtime"] } @@ -51,8 +52,8 @@ zstd = "0.13" [target.'cfg(target_os = "linux")'.dependencies] tdx = { version = "0.1.0", optional = true } -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 3072d0349..662a82d75 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -51,6 +51,8 @@ use devices::virtio::{port_io, MmioTransport, PortDescription, VirtioDevice, Vso use kbs_types::Tee; use crate::device_manager; +#[cfg(all(feature = "vhost-user", target_os = "linux"))] +use crate::resources::VhostUserDeviceConfig; #[cfg(target_os = "linux")] use crate::signal_handler::register_sigint_handler; #[cfg(target_os = "linux")] @@ -93,6 +95,8 @@ use vm_memory::mmap::MmapRegion; #[cfg(not(any(feature = "tee", feature = "aws-nitro")))] use vm_memory::Address; use vm_memory::Bytes; +#[cfg(all(feature = "vhost-user", target_os = "linux"))] +use vm_memory::FileOffset; #[cfg(not(feature = "aws-nitro"))] use vm_memory::GuestMemory; #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] @@ -209,6 +213,8 @@ pub enum StartMicrovmError { RegisterRngDevice(device_manager::mmio::Error), /// Cannot initialize a MMIO Snd device or add a device to the MMIO Bus. RegisterSndDevice(device_manager::mmio::Error), + /// Cannot initialize a vhost-user device or add a device to the MMIO Bus. + RegisterVhostUserDevice(device_manager::mmio::Error), /// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus. RegisterVsockDevice(device_manager::mmio::Error), /// Cannot attest the VM in the Secure Virtualization context. @@ -461,6 +467,14 @@ impl Display for StartMicrovmError { "Cannot initialize a MMIO Snd Device or add a device to the MMIO Bus. {err_msg}" ) } + RegisterVhostUserDevice(ref err) => { + let mut err_msg = format!("{err}"); + err_msg = err_msg.replace('\"', ""); + write!( + f, + "Cannot initialize a vhost-user device or add a device to the MMIO Bus. {err_msg}" + ) + } RegisterVsockDevice(ref err) => { let mut err_msg = format!("{err}"); err_msg = err_msg.replace('\"', ""); @@ -973,7 +987,29 @@ pub fn build_microvm( #[cfg(not(feature = "tee"))] attach_balloon_device(&mut vmm, event_manager, intc.clone())?; #[cfg(not(feature = "tee"))] - attach_rng_device(&mut vmm, event_manager, intc.clone())?; + { + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + { + const VIRTIO_ID_RNG: u32 = 4; + for device_config in &vm_resources.vhost_user_devices { + attach_vhost_user_device(&mut vmm, event_manager, intc.clone(), device_config)?; + } + + let has_vhost_user_rng = vm_resources + .vhost_user_devices + .iter() + .any(|dev| dev.device_type == VIRTIO_ID_RNG); + + if !has_vhost_user_rng { + attach_rng_device(&mut vmm, event_manager, intc.clone())?; + } + } + + #[cfg(not(all(feature = "vhost-user", target_os = "linux")))] + { + attach_rng_device(&mut vmm, event_manager, intc.clone())?; + } + } let mut console_id = 0; if !vm_resources.disable_implicit_console { attach_console_devices( @@ -1331,9 +1367,79 @@ fn load_payload( return Err(StartMicrovmError::MissingKernelConfig); }; - let kernel_region = unsafe { - MmapRegion::build_raw(kernel_host_addr as *mut u8, kernel_size, 0, 0) - .map_err(StartMicrovmError::InvalidKernelBundle)? + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + let use_vhost_user = !_vm_resources.vhost_user_devices.is_empty(); + #[cfg(not(all(feature = "vhost-user", target_os = "linux")))] + let use_vhost_user = false; + + let kernel_region = if use_vhost_user { + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + { + debug!( + "Creating file-backed kernel region for vhost-user (size=0x{:x})", + kernel_size + ); + // SAFETY: memfd_create is called with a valid null-terminated C string and valid flags. + // File descriptor ownership is transferred to File::from_raw_fd below. + let memfd = unsafe { + let fd = libc::memfd_create( + b"kernel\0".as_ptr() as *const libc::c_char, + libc::MFD_CLOEXEC, + ); + if fd < 0 { + error!( + "Failed to create memfd for kernel: {:?}", + io::Error::last_os_error() + ); + return Err(StartMicrovmError::GuestMemoryMmap(format!( + "memfd_create failed: {:?}", + io::Error::last_os_error() + ))); + } + if libc::ftruncate(fd, kernel_size as i64) < 0 { + error!( + "Failed to ftruncate kernel memfd: {:?}", + io::Error::last_os_error() + ); + libc::close(fd); + return Err(StartMicrovmError::GuestMemoryMmap(format!( + "ftruncate failed: {:?}", + io::Error::last_os_error() + ))); + } + debug!("Created kernel memfd with fd={}", fd); + File::from_raw_fd(fd) + }; + + let file_offset = FileOffset::new(memfd, 0); + let region = MmapRegion::from_file(file_offset, kernel_size) + .map_err(StartMicrovmError::InvalidKernelBundle)?; + + // SAFETY: kernel_host_addr points to valid kernel data of size kernel_size, + // provided by the kernel bundle loader. + let kernel_data = unsafe { + std::slice::from_raw_parts(kernel_host_addr as *const u8, kernel_size) + }; + // SAFETY: Both source (kernel_data) and destination (region) are valid for + // kernel_size bytes. Regions don't overlap as dest is newly allocated memfd-backed + // memory and source is from kernel bundle. + unsafe { + let dest = region.as_ptr() as *mut u8; + std::ptr::copy_nonoverlapping(kernel_data.as_ptr(), dest, kernel_size); + } + debug!("Copied kernel data to file-backed region"); + + region + } + #[cfg(not(all(feature = "vhost-user", target_os = "linux")))] + unreachable!() + } else { + // SAFETY: kernel_host_addr points to valid kernel data of size kernel_size. + // The memory region is managed by the kernel bundle and remains valid. + unsafe { + MmapRegion::build_raw(kernel_host_addr as *mut u8, kernel_size, 0, 0) + .map_err(StartMicrovmError::InvalidKernelBundle)? + } }; Ok(( @@ -1502,10 +1608,74 @@ pub fn create_guest_memory( .map_err(StartMicrovmError::ShmCreate)?; } + // For vhost-user devices, we need file-backed memory so the backend can mmap it + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + let use_vhost_user = !vm_resources.vhost_user_devices.is_empty(); + #[cfg(not(all(feature = "vhost-user", target_os = "linux")))] + let use_vhost_user = false; + + // Add SHM regions before creating guest memory arch_mem_regions.extend(shm_manager.regions()); - let guest_mem = GuestMemoryMmap::from_ranges(&arch_mem_regions) - .map_err(|e| StartMicrovmError::GuestMemoryMmap(format!("{e:?}")))?; + let guest_mem = if use_vhost_user { + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + { + debug!( + "Creating file-backed memory for vhost-user (regions: {})", + arch_mem_regions.len() + ); + // Create file-backed memory regions using memfd + let regions_with_files: Vec<_> = arch_mem_regions + .iter() + .map(|(addr, size)| { + debug!( + " Creating memfd for region: addr=0x{:x}, size=0x{:x}", + addr.0, size + ); + // SAFETY: memfd_create is called with a valid null-terminated C string and valid flags. + // File descriptor ownership is transferred to File::from_raw_fd below. + let memfd = unsafe { + let fd = libc::memfd_create( + b"guest_mem\0".as_ptr() as *const libc::c_char, + libc::MFD_CLOEXEC, + ); + if fd < 0 { + error!("Failed to create memfd: {:?}", io::Error::last_os_error()); + return Err(io::Error::last_os_error()); + } + if libc::ftruncate(fd, *size as i64) < 0 { + error!( + "Failed to ftruncate memfd: {:?}", + io::Error::last_os_error() + ); + libc::close(fd); + return Err(io::Error::last_os_error()); + } + debug!(" Created memfd with fd={}", fd); + File::from_raw_fd(fd) + }; + + let file_offset = FileOffset::new(memfd, 0); + Ok((*addr, *size, Some(file_offset))) + }) + .collect::, io::Error>>() + .map_err(|e| { + StartMicrovmError::GuestMemoryMmap(format!("memfd creation failed: {e:?}")) + })?; + + debug!( + "Created {} file-backed memory regions", + regions_with_files.len() + ); + GuestMemoryMmap::from_ranges_with_files(®ions_with_files) + .map_err(|e| StartMicrovmError::GuestMemoryMmap(format!("{e:?}")))? + } + #[cfg(not(all(feature = "vhost-user", target_os = "linux")))] + unreachable!() + } else { + GuestMemoryMmap::from_ranges(&arch_mem_regions) + .map_err(|e| StartMicrovmError::GuestMemoryMmap(format!("{e:?}")))? + }; let (guest_mem, entry_addr, initrd_config, cmdline) = load_payload(vm_resources, guest_mem, &arch_mem_info, payload)?; @@ -2252,6 +2422,41 @@ fn attach_rng_device( Ok(()) } +#[cfg(not(feature = "tee"))] +#[cfg(all(feature = "vhost-user", target_os = "linux"))] +fn attach_vhost_user_device( + vmm: &mut Vmm, + event_manager: &mut EventManager, + intc: IrqChip, + device_config: &VhostUserDeviceConfig, +) -> std::result::Result<(), StartMicrovmError> { + use self::StartMicrovmError::*; + + let device_name = device_config + .name + .clone() + .unwrap_or_else(|| format!("vhost-user-{}", device_config.device_type)); + + let device = Arc::new(Mutex::new( + devices::virtio::VhostUserDevice::new( + &device_config.socket_path, + device_config.device_type, + device_name.clone(), + device_config.num_queues, + &device_config.queue_sizes, + ) + .map_err(|e| RegisterVhostUserDevice(device_manager::mmio::Error::VhostUserDevice(e)))?, + )); + + event_manager + .add_subscriber(device.clone()) + .map_err(RegisterEvent)?; + + attach_mmio_device(vmm, device_name, intc.clone(), device).map_err(RegisterVhostUserDevice)?; + + Ok(()) +} + #[cfg(feature = "gpu")] #[allow(clippy::too_many_arguments)] fn attach_gpu_device( diff --git a/src/vmm/src/device_manager/kvm/mmio.rs b/src/vmm/src/device_manager/kvm/mmio.rs index e739afb42..f2e0a687b 100644 --- a/src/vmm/src/device_manager/kvm/mmio.rs +++ b/src/vmm/src/device_manager/kvm/mmio.rs @@ -41,6 +41,9 @@ pub enum Error { DeviceNotFound, /// Failed to update the mmio device. UpdateFailed, + /// Failed to create vhost-user device. + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + VhostUserDevice(io::Error), } impl fmt::Display for Error { @@ -59,6 +62,8 @@ impl fmt::Display for Error { Error::RegisterIrqFd(ref e) => write!(f, "failed to register irqfd: {e}"), Error::DeviceNotFound => write!(f, "the device couldn't be found"), Error::UpdateFailed => write!(f, "failed to update the mmio device"), + #[cfg(all(feature = "vhost-user", target_os = "linux"))] + Error::VhostUserDevice(ref e) => write!(f, "failed to create vhost-user device: {e}"), } } } diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index d8d0fff24..f61554ce8 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -40,6 +40,22 @@ type Result = std::result::Result<(), E>; // Re-export TsiFlags from devices crate pub use devices::virtio::TsiFlags; +#[cfg(feature = "vhost-user")] +/// Configuration for a vhost-user device. +#[derive(Debug, Clone)] +pub struct VhostUserDeviceConfig { + /// Virtio device type ID (e.g., 4 for RNG, 25 for sound, 36 for CAN) + pub device_type: u32, + /// Path to the vhost-user Unix domain socket + pub socket_path: String, + /// Device name for logging/debugging (None = auto-generate from type) + pub name: Option, + /// Number of virtqueues (0 = use device default) + pub num_queues: u16, + /// Size of each queue (empty = use device defaults) + pub queue_sizes: Vec, +} + /// Errors encountered when configuring microVM resources. #[derive(Debug)] pub enum Error { @@ -173,6 +189,9 @@ pub struct VmResources { #[cfg(feature = "snd")] /// Enable the virtio-snd device. pub snd_device: bool, + #[cfg(feature = "vhost-user")] + /// Vhost-user device configurations + pub vhost_user_devices: Vec, /// File to send console output. pub console_output: Option, /// SMBIOS OEM Strings @@ -423,6 +442,8 @@ mod tests { input_backends: Vec::new(), #[cfg(feature = "snd")] snd_device: false, + #[cfg(feature = "vhost-user")] + vhost_user_devices: Vec::new(), console_output: None, smbios_oem_strings: None, nested_enabled: false,