diff --git a/Cargo.lock b/Cargo.lock index df6a01be..6262f5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,16 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -443,6 +453,8 @@ dependencies = [ "env_logger", "fact-api", "fact-ebpf", + "glob", + "globset", "http-body-util", "hyper", "hyper-tls", @@ -574,18 +586,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - [[package]] name = "getrandom" version = "0.4.1" @@ -607,9 +607,22 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] [[package]] name = "h2" @@ -939,7 +952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -1490,7 +1503,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1744,7 +1757,7 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.4.1", + "getrandom", "js-sys", "wasm-bindgen", ] @@ -1776,15 +1789,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -2003,15 +2007,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 742c25f4..01330200 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ aya = { version = "0.13.1", default-features = false } anyhow = { version = "1", default-features = false, features = ["std", "backtrace"] } clap = { version = "4.5.41", features = ["derive", "env"] } env_logger = { version = "0.11.5", default-features = false, features = ["humantime"] } +glob = "0.3.3" +globset = "0.4.18" http-body-util = "0.1.3" hyper = { version = "1.6.0", default-features = false } hyper-tls = "0.6.0" diff --git a/README.md b/README.md index f4a6c81d..20f712a2 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ In order to run these tests as part of the unit test suite y use the following command: ```shell -cargo test --config 'target."cfg(all())".runner="sudo -E" --features=bpf-test +cargo test --config 'target."cfg(all())".runner="sudo -E"' --features=bpf-test ``` ## Create compile_commands.json diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index dd0639de..6c420095 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -6,12 +6,13 @@ #include "builtins.h" #include "types.h" #include "maps.h" +#include "inode.h" #include #include // clang-format on -__always_inline static bool is_monitored(struct bound_path_t* path) { +__always_inline static bool path_is_monitored(struct bound_path_t* path) { if (!filter_by_prefix()) { // no path configured, allow all return true; @@ -30,3 +31,19 @@ __always_inline static bool is_monitored(struct bound_path_t* path) { path->len = len; return res; } + +__always_inline static bool is_monitored(inode_key_t inode, struct bound_path_t* path, inode_key_t** submit) { + const inode_value_t* volatile inode_value = inode_get(&inode); + + switch (inode_is_monitored(inode_value)) { + case NOT_MONITORED: + if (path_is_monitored(path)) { + return true; + } + *submit = NULL; + return false; + case MONITORED: + break; + } + return true; +} diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 3c71fd71..e0a23522 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -45,18 +45,13 @@ int BPF_PROG(trace_file_open, struct file* file) { } inode_key_t inode_key = inode_to_key(file->f_inode); - const inode_value_t* inode = inode_get(&inode_key); - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - goto ignored; - } - break; - case MONITORED: - break; + inode_key_t* inode_to_submit = &inode_key; + + if (!is_monitored(inode_key, path, &inode_to_submit)) { + goto ignored; } - submit_open_event(&m->file_open, event_type, path->path, &inode_key); + submit_open_event(&m->file_open, event_type, path->path, inode_to_submit); return 0; @@ -82,24 +77,16 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { } inode_key_t inode_key = inode_to_key(dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(path)) { - m->path_unlink.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - inode_remove(&inode_key); - break; + if (!is_monitored(inode_key, path, &inode_to_submit)) { + m->path_unlink.ignored++; + return 0; } submit_unlink_event(&m->path_unlink, path->path, - &inode_key); + inode_to_submit); return 0; } @@ -120,24 +107,17 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chmod.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chmod.ignored++; + return 0; } umode_t old_mode = BPF_CORE_READ(path, dentry, d_inode, i_mode); submit_mode_event(&m->path_chmod, bound_path->path, - &inode_key, + inode_to_submit, mode, old_mode); @@ -164,18 +144,11 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign } inode_key_t inode_key = inode_to_key(path->dentry->d_inode); - const inode_value_t* inode = inode_get(&inode_key); - - switch (inode_is_monitored(inode)) { - case NOT_MONITORED: - if (!is_monitored(bound_path)) { - m->path_chown.ignored++; - return 0; - } - break; + inode_key_t* inode_to_submit = &inode_key; - case MONITORED: - break; + if (!is_monitored(inode_key, bound_path, &inode_to_submit)) { + m->path_chown.ignored++; + return 0; } struct dentry* d = BPF_CORE_READ(path, dentry); @@ -184,7 +157,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign submit_ownership_event(&m->path_chown, bound_path->path, - &inode_key, + inode_to_submit, uid, gid, old_uid, @@ -217,14 +190,15 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, } inode_key_t old_inode = inode_to_key(old_dentry->d_inode); - const inode_value_t* volatile old_inode_value = inode_get(&old_inode); inode_key_t new_inode = inode_to_key(new_dentry->d_inode); - const inode_value_t* volatile new_inode_value = inode_get(&new_inode); - if (inode_is_monitored(old_inode_value) == NOT_MONITORED && - inode_is_monitored(new_inode_value) == NOT_MONITORED && - !is_monitored(old_path) && - !is_monitored(new_path)) { + inode_key_t* old_inode_submit = &old_inode; + inode_key_t* new_inode_submit = &new_inode; + + bool old_monitored = is_monitored(old_inode, old_path, &old_inode_submit); + bool new_monitored = is_monitored(new_inode, new_path, &new_inode_submit); + + if (!old_monitored && !new_monitored) { m->path_rename.ignored++; return 0; } @@ -232,8 +206,8 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, submit_rename_event(&m->path_rename, new_path->path, old_path->path, - &old_inode, - &new_inode); + old_inode_submit, + new_inode_submit); return 0; error: diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index 0bf7a1ad..bd84ee08 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -30,17 +30,22 @@ impl TryFrom<&PathBuf> for path_prefix_t { prefix: value.display().to_string(), }); }; - let len = if filename.len() > LPM_SIZE_MAX as usize { - LPM_SIZE_MAX as usize - } else { - filename.len() - }; + + // Take the start of the path until the first occurence of a wildcard + // character. This is used as a filter in the kernel in cases where + // the inode has failed to match. The full wildcard string is used + // for further processing in userspace. + // + // unwrap is safe here - if there are no matches, the full string is the + // only item in the iterator + let filename_prefix = filename.split(['*', '?', '[', '{']).next().unwrap(); + let len = filename_prefix.len().min(LPM_SIZE_MAX as usize); unsafe { let mut cfg: path_prefix_t = std::mem::zeroed(); memcpy( cfg.path.as_mut_ptr() as *mut _, - filename.as_ptr() as *const _, + filename_prefix.as_ptr() as *const _, len, ); cfg.bit_len = (len * 8) as u32; @@ -63,6 +68,12 @@ impl PartialEq for path_prefix_t { unsafe impl Pod for path_prefix_t {} +impl inode_key_t { + pub fn empty(&self) -> bool { + self.inode == 0 && self.dev == 0 + } +} + impl PartialEq for inode_key_t { fn eq(&self, other: &Self) -> bool { self.inode == other.inode && self.dev == other.dev diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 3b84db24..64218b33 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -10,6 +10,8 @@ anyhow = { workspace = true } aya = { workspace = true } clap = { workspace = true } env_logger = { workspace = true } +glob = { workspace = true } +globset = { workspace = true } http-body-util = { workspace = true } hyper = { workspace = true } hyper-tls = { workspace = true } diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 71077cb8..f9b6ca3d 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -7,6 +7,7 @@ use aya::{ Btf, Ebpf, }; use checks::Checks; +use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; use log::{error, info}; use tokio::{ @@ -30,6 +31,8 @@ pub struct Bpf { paths: Vec, paths_config: watch::Receiver>, + + paths_globset: GlobSet, } impl Bpf { @@ -61,6 +64,7 @@ impl Bpf { tx, paths, paths_config, + paths_globset: GlobSet::empty(), }; bpf.load_paths()?; @@ -127,11 +131,23 @@ impl Bpf { // Add the new prefixes let mut new_paths = Vec::with_capacity(paths_config.len()); + let mut builder = GlobSetBuilder::new(); for p in paths_config.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + + builder.add( + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) + .unwrap(), + ); + let prefix = path_prefix_t::try_from(p)?; path_prefix.insert(&prefix.into(), 0, 0)?; new_paths.push(prefix); } + self.paths_globset = builder.build()?; // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { @@ -193,7 +209,21 @@ impl Bpf { while let Some(event) = ringbuf.next() { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { - Ok(event) => event, + Ok(event) => { + // With wildcards, the kernel can only match on the inode and + // then the longest non-wildcard prefix (e.g. for /etc/**/*.conf, + // the kernel matches up to /etc/) + // + // The kernel sets inode to 0 when it matched via path prefix only. + // so we only need to perform a glob match against the filename + if !event.get_inode().empty() || + self.paths_globset.is_match(event.get_filename()) { + event + } else { + event_counter.dropped(); + continue; + } + }, Err(e) => { error!("Failed to parse event: '{e}'"); event_counter.dropped(); @@ -253,7 +283,7 @@ mod bpf_tests { let monitored_path = env!("CARGO_MANIFEST_DIR"); let monitored_path = PathBuf::from(monitored_path); - let paths = vec![monitored_path.clone()]; + let paths = vec![PathBuf::from(format!("{}/**/*", monitored_path.display()))]; let mut config = FactConfig::default(); config.set_paths(paths); let reloader = Reloader::from(config); diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index c85189d5..080274e5 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -150,6 +150,28 @@ impl Event { } } + pub fn get_filename(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.filename, + FileData::Creation(data) => &data.filename, + FileData::Unlink(data) => &data.filename, + FileData::Chmod(data) => &data.inner.filename, + FileData::Chown(data) => &data.inner.filename, + FileData::Rename(data) => &data.new.filename, + } + } + + pub fn get_host_path(&self) -> &PathBuf { + match &self.file { + FileData::Open(data) => &data.host_file, + FileData::Creation(data) => &data.host_file, + FileData::Unlink(data) => &data.host_file, + FileData::Chmod(data) => &data.inner.host_file, + FileData::Chown(data) => &data.inner.host_file, + FileData::Rename(data) => &data.new.host_file, + } + } + /// Set the `host_file` field of the event to the one provided. /// /// In the case of operations that involve two paths, like rename, diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index 2adbb58d..150c2bfa 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -25,7 +25,7 @@ use std::{ sync::Arc, }; -use anyhow::Context; +use anyhow::{Context, bail}; use aya::maps::MapData; use fact_ebpf::{inode_key_t, inode_value_t}; use log::{debug, info, warn}; @@ -75,8 +75,8 @@ impl HostScanner { fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); - for path in self.config.borrow().iter() { - let path = host_info::prepend_host_mount(path); + for pattern in self.config.borrow().iter() { + let path = host_info::prepend_host_mount(pattern); self.scan_inner(&path)?; } debug!("Host scan done"); @@ -85,15 +85,21 @@ impl HostScanner { } fn scan_inner(&self, path: &Path) -> anyhow::Result<()> { - if path.is_dir() { - for entry in path.read_dir()?.flatten() { - let entry = entry.path(); - self.scan_inner(&entry) - .with_context(|| format!("Failed to scan {}", entry.display()))?; + let Some(glob_str) = path.to_str() else { + bail!("invalid path {}", path.display()); + }; + + for entry in glob::glob(glob_str)? { + match entry { + Ok(path) => { + if path.is_file() { + self.update_entry(path.as_path()).with_context(|| { + format!("Failed to update entry for {}", path.display()) + })?; + } + } + Err(e) => return Err(e.into()), } - } else if path.is_file() { - self.update_entry(path) - .with_context(|| format!("Failed to update entry for {}", path.display()))?; } Ok(()) } diff --git a/tests/conftest.py b/tests/conftest.py index 143167bd..e00e4c5f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -143,7 +143,7 @@ def dump_logs(container, file): def fact_config(request, monitored_dir, logs_dir): cwd = os.getcwd() config = { - 'paths': [monitored_dir, '/mounted', '/container-dir'], + 'paths': [f'{monitored_dir}/**/*', '/mounted/**/*', '/container-dir/**/*'], 'grpc': { 'url': 'http://127.0.0.1:9999', }, diff --git a/tests/test_config_hotreload.py b/tests/test_config_hotreload.py index 9a875228..4afeac8f 100644 --- a/tests/test_config_hotreload.py +++ b/tests/test_config_hotreload.py @@ -136,7 +136,7 @@ def test_paths(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [ignored_dir] + config['paths'] = [f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up @@ -173,7 +173,7 @@ def test_paths_addition(fact, fact_config, monitored_dir, ignored_dir, server): server.wait_events([e]) config, config_file = fact_config - config['paths'] = [monitored_dir, ignored_dir] + config['paths'] = [f'{monitored_dir}/**/*', f'{ignored_dir}/**/*'] reload_config(fact, config, config_file, delay=0.5) # At this point, the event in the ignored directory should show up diff --git a/tests/test_wildcard.py b/tests/test_wildcard.py new file mode 100644 index 00000000..fd1728f3 --- /dev/null +++ b/tests/test_wildcard.py @@ -0,0 +1,128 @@ +from time import sleep +import os + +import pytest +import yaml + +from event import Event, EventType, Process + + +@pytest.fixture +def wildcard_config(fact, fact_config, monitored_dir): + config, config_file = fact_config + config['paths'] = [ + f'{monitored_dir}/**/*.txt', + f'{monitored_dir}/*.conf', + f'{monitored_dir}/**/test-*.log', + ] + with open(config_file, 'w') as f: + yaml.dump(config, f) + + # reload the config + fact.kill('SIGHUP') + sleep(0.1) + return config, config_file + + +def test_extension_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Should not match any pattern + log_file = os.path.join(monitored_dir, 'app.log') + with open(log_file, 'w') as f: + f.write('This should be ignored') + + txt_file = os.path.join(monitored_dir, 'document.txt') + with open(txt_file, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path='') + + server.wait_events([e]) + + +def test_prefix_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Wrong prefix - should not match + app_log = os.path.join(monitored_dir, 'app-test.log') + with open(app_log, 'w') as f: + f.write('This should be ignored') + + test_log = os.path.join(monitored_dir, 'test-app.log') + with open(test_log, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=test_log, host_path='') + + server.wait_events([e]) + + +def test_recursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + nested_dir = os.path.join(monitored_dir, 'level1', 'level2') + os.makedirs(nested_dir, exist_ok=True) + + # Different extension - should not match + nested_md = os.path.join(nested_dir, 'readme.md') + with open(nested_md, 'w') as f: + f.write('Should be ignored') + + root_txt = os.path.join(monitored_dir, 'root.txt') + with open(root_txt, 'w') as f: + f.write('Root level txt') + + nested_txt = os.path.join(nested_dir, 'nested.txt') + with open(nested_txt, 'w') as f: + f.write('Nested txt') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=root_txt, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=nested_txt, host_path=''), + ] + + server.wait_events(events) + + +def test_nonrecursive_wildcard(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + fut = os.path.join(monitored_dir, 'app.conf') + with open(fut, 'w') as f: + f.write('This should be captured') + + e = Event(process=process, event_type=EventType.CREATION, + file=fut, host_path='') + + server.wait_events([e]) + + +def test_multiple_patterns(wildcard_config, monitored_dir, server): + process = Process.from_proc() + + # Matches no pattern + conf_file = os.path.join(monitored_dir, 'config.yml') + with open(conf_file, 'w') as f: + f.write('Config file') + + txt_file = os.path.join(monitored_dir, 'notes.txt') + with open(txt_file, 'w') as f: + f.write('Text file') + + log_file = os.path.join(monitored_dir, 'test-output.log') + with open(log_file, 'w') as f: + f.write('Log file') + + events = [ + Event(process=process, event_type=EventType.CREATION, + file=txt_file, host_path=''), + Event(process=process, event_type=EventType.CREATION, + file=log_file, host_path=''), + ] + + server.wait_events(events)