diff --git a/Cargo.lock b/Cargo.lock index df6a01be..d09a4115 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,6 +457,7 @@ dependencies = [ "regex", "serde", "serde_json", + "shlex", "tempfile", "tokio", "tokio-native-tls", diff --git a/Cargo.toml b/Cargo.toml index 742c25f4..45b275fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ prost = "0.14.0" prost-types = "0.14.0" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.142" +shlex = "1.3.0" tokio = { version = "1.40.0", default-features = false, features = [ "fs", "macros", diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 3b84db24..9b1b52e3 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -27,6 +27,7 @@ prost = { workspace = true } prost-types = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +shlex = { workspace = true } uuid = { workspace = true } yaml-rust2 = { workspace = true } diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index 5e42e530..3dc6fd4b 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -192,10 +192,11 @@ impl From for fact_api::ProcessSignal { let container_id = container_id.unwrap_or("".to_string()); - let args = args - .into_iter() - .reduce(|acc, i| acc + " " + &i) - .unwrap_or("".to_owned()); + // try_join can fail if args contain nul bytes, though this should not happen + // since args are parsed from C strings which are nul-terminated + let Ok(args) = shlex::try_join(args.iter().map(|s| s.as_str())) else { + unreachable!(); + }; Self { id: Uuid::new_v4().to_string(), diff --git a/tests/conftest.py b/tests/conftest.py index 143167bd..ccba3650 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,45 +17,6 @@ ] -def join_path_with_filename(directory, filename): - """ - Join a directory path with a filename, handling bytes filenames properly. - - When filename is bytes (e.g., containing invalid UTF-8), converts the - directory to bytes before joining to avoid mixing str and bytes. - - Args: - directory: Directory path (str) - filename: Filename (str or bytes) - - Returns: - Joined path (str or bytes, matching the filename type) - """ - if isinstance(filename, bytes): - return os.path.join(os.fsencode(directory), filename) - else: - return os.path.join(directory, filename) - - -def path_to_string(path): - """ - Convert a filesystem path to string, replacing invalid UTF-8 with U+FFFD. - - This matches the behavior of Rust's String::from_utf8_lossy() used in - the fact codebase. - - Args: - path: Filesystem path (str or bytes) - - Returns: - String representation with invalid UTF-8 replaced by replacement character - """ - if isinstance(path, bytes): - return path.decode('utf-8', errors='replace') - else: - return path - - @pytest.fixture def monitored_dir(): """ diff --git a/tests/event.py b/tests/event.py index 91dffd1a..fd7de662 100644 --- a/tests/event.py +++ b/tests/event.py @@ -6,6 +6,7 @@ from internalapi.sensor.collector_pb2 import ProcessSignal from internalapi.sensor.sfa_pb2 import FileActivity +import utils def extract_container_id(cgroup: str) -> str: @@ -86,7 +87,7 @@ def get_id(line: str, wanted_id: str) -> int | None: content = f.read(4096) args = [arg.decode('utf-8') for arg in content.split(b'\x00') if arg] - args = ' '.join(args) + args = utils.rust_style_join(args) with open(os.path.join(proc_dir, 'comm'), 'r') as f: name = f.read().strip() diff --git a/tests/test_editors/test_nvim.py b/tests/test_editors/test_nvim.py index 75eeac17..9f3d095d 100644 --- a/tests/test_editors/test_nvim.py +++ b/tests/test_editors/test_nvim.py @@ -5,13 +5,13 @@ def test_new_file(editor_container, server): fut = '/mounted/test.txt' + cmd = f"nvim {fut} '+:normal iThis is a test' -c x" - editor_container.exec_run( - f"nvim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) process = Process.in_container( exe_path='/usr/bin/nvim', - args=f'nvim {fut} +:normal iThis is a test -c x', + args=cmd, name='nvim', container_id=editor_container.id[:12], ) @@ -26,12 +26,12 @@ def test_new_file(editor_container, server): def test_open_file(editor_container, server): fut = '/mounted/test.txt' fut_backup = f'{fut}~' + cmd = f"nvim {fut} '+:normal iThis is a test' -c x" container_id = editor_container.id[:12] # We ensure the file exists before editing. editor_container.exec_run(f'touch {fut}') - editor_container.exec_run( - f"nvim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) touch = Process.in_container( exe_path='/usr/bin/touch', @@ -41,7 +41,7 @@ def test_open_file(editor_container, server): ) nvim = Process.in_container( exe_path='/usr/bin/nvim', - args=f'nvim {fut} +:normal iThis is a test -c x', + args=cmd, name='nvim', container_id=container_id, ) @@ -72,13 +72,13 @@ def test_open_file(editor_container, server): def test_new_file_ovfs(editor_container, server): fut = '/container-dir/test.txt' + cmd = f"nvim {fut} '+:normal iThis is a test' -c x" - editor_container.exec_run( - f"nvim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) process = Process.in_container( exe_path='/usr/bin/nvim', - args=f'nvim {fut} +:normal iThis is a test -c x', + args=cmd, name='nvim', container_id=editor_container.id[:12], ) @@ -95,12 +95,12 @@ def test_new_file_ovfs(editor_container, server): def test_open_file_ovfs(editor_container, server): fut = '/container-dir/test.txt' fut_backup = f'{fut}~' + cmd = f"nvim {fut} '+:normal iThis is a test' -c x" container_id = editor_container.id[:12] # We ensure the file exists before editing. editor_container.exec_run(f'touch {fut}') - editor_container.exec_run( - f"nvim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) touch = Process.in_container( exe_path='/usr/bin/touch', @@ -110,7 +110,7 @@ def test_open_file_ovfs(editor_container, server): ) nvim = Process.in_container( exe_path='/usr/bin/nvim', - args=f'nvim {fut} +:normal iThis is a test -c x', + args=cmd, name='nvim', container_id=container_id, ) diff --git a/tests/test_editors/test_sed.py b/tests/test_editors/test_sed.py index 39fd5d32..c3f6a2f8 100644 --- a/tests/test_editors/test_sed.py +++ b/tests/test_editors/test_sed.py @@ -5,20 +5,22 @@ def test_sed(vi_container, server): # File Under Test fut = '/mounted/test.txt' + create_cmd = f"sh -c \"echo 'This is a test' > {fut}\"" + sed_cmd = fr'sed -i -e "s/a test/not \\0/" {fut}' container_id = vi_container.id[:12] - vi_container.exec_run(f"sh -c \"echo 'This is a test' > {fut}\"") - vi_container.exec_run(fr"sed -i -e 's/a test/not \0/' {fut}") + vi_container.exec_run(create_cmd) + vi_container.exec_run(sed_cmd) shell = Process.in_container( exe_path='/usr/bin/bash', - args=f"sh -c echo 'This is a test' > {fut}", + args=create_cmd, name='sh', container_id=container_id, ) sed = Process.in_container( exe_path='/usr/bin/sed', - args=fr'sed -i -e s/a test/not \0/ {fut}', + args=sed_cmd, name='sed', container_id=container_id, ) @@ -42,20 +44,22 @@ def test_sed(vi_container, server): def test_sed_ovfs(vi_container, server): # File Under Test fut = '/container-dir/test.txt' + create_cmd = f"sh -c \"echo 'This is a test' > {fut}\"" + sed_cmd = fr'sed -i -e "s/a test/not \\0/" {fut}' container_id = vi_container.id[:12] - vi_container.exec_run(f"sh -c \"echo 'This is a test' > {fut}\"") - vi_container.exec_run(fr"sed -i -e 's/a test/not \0/' {fut}") + vi_container.exec_run(create_cmd) + vi_container.exec_run(sed_cmd) shell = Process.in_container( exe_path='/usr/bin/bash', - args=f"sh -c echo 'This is a test' > {fut}", + args=create_cmd, name='sh', container_id=container_id, ) sed = Process.in_container( exe_path='/usr/bin/sed', - args=fr'sed -i -e s/a test/not \0/ {fut}', + args=sed_cmd, name='sed', container_id=container_id, ) diff --git a/tests/test_editors/test_vi.py b/tests/test_editors/test_vi.py index dd4b3c2b..4301890a 100644 --- a/tests/test_editors/test_vi.py +++ b/tests/test_editors/test_vi.py @@ -8,12 +8,13 @@ def test_new_file(vi_container, server): swx_file = '/mounted/.test.txt.swx' exe = '/usr/bin/vi' - vi_container.exec_run( - f"vi {fut} +':normal iThis is a test' -c x") + cmd = f"{exe} {fut} '+:normal iThis is a test' -c x" + + vi_container.exec_run(cmd) process = Process.in_container( exe_path=exe, - args=f'vi {fut} +:normal iThis is a test -c x', + args=cmd, name='vi', container_id=vi_container.id[:12], ) @@ -44,12 +45,13 @@ def test_new_file_ovfs(vi_container, server): swx_file = '/container-dir/.test.txt.swx' exe = '/usr/bin/vi' - vi_container.exec_run( - f"vi {fut} +':normal iThis is a test' -c x") + cmd = f"{exe} {fut} '+:normal iThis is a test' -c x" + + vi_container.exec_run(cmd) process = Process.in_container( exe_path=exe, - args=f'vi {fut} +:normal iThis is a test -c x', + args=cmd, name='vi', container_id=vi_container.id[:12], ) @@ -91,10 +93,11 @@ def test_open_file(vi_container, server): exe = '/usr/bin/vi' container_id = vi_container.id[:12] + cmd = f"{exe} {fut} '+:normal iThis is a test' -c x" + # We ensure the file exists before editing. vi_container.exec_run(f'touch {fut}') - vi_container.exec_run( - f"vi {fut} +':normal iThis is a test' -c x") + vi_container.exec_run(cmd) touch_process = Process.in_container( exe_path='/usr/bin/touch', @@ -104,7 +107,7 @@ def test_open_file(vi_container, server): ) vi_process = Process.in_container( exe_path=exe, - args=f'vi {fut} +:normal iThis is a test -c x', + args=cmd, name='vi', container_id=container_id, ) @@ -154,10 +157,11 @@ def test_open_file_ovfs(vi_container, server): exe = '/usr/bin/vi' container_id = vi_container.id[:12] + cmd = f"{exe} {fut} '+:normal iThis is a test' -c x" + # We ensure the file exists before editing. vi_container.exec_run(f'touch {fut}') - vi_container.exec_run( - f"vi {fut} +':normal iThis is a test' -c x") + vi_container.exec_run(cmd) touch_process = Process.in_container( exe_path='/usr/bin/touch', @@ -167,7 +171,7 @@ def test_open_file_ovfs(vi_container, server): ) vi_process = Process.in_container( exe_path=exe, - args=f'vi {fut} +:normal iThis is a test -c x', + args=cmd, name='vi', container_id=container_id, ) diff --git a/tests/test_editors/test_vim.py b/tests/test_editors/test_vim.py index 5fa6bf92..56cbb667 100644 --- a/tests/test_editors/test_vim.py +++ b/tests/test_editors/test_vim.py @@ -7,12 +7,13 @@ def test_new_file(editor_container, server): swap_file = '/mounted/.test.txt.swp' swx_file = '/mounted/.test.txt.swx' - editor_container.exec_run( - f"vim {fut} +':normal iThis is a test' -c x") + cmd = f"vim {fut} '+:normal iThis is a test' -c x" + + editor_container.exec_run(cmd) process = Process.in_container( exe_path='/usr/bin/vim', - args=f'vim {fut} +:normal iThis is a test -c x', + args=cmd, name='vim', container_id=editor_container.id[:12], ) @@ -41,12 +42,13 @@ def test_new_file_ovfs(editor_container, server): swap_file = '/container-dir/.test.txt.swp' swx_file = '/container-dir/.test.txt.swx' - editor_container.exec_run( - f"vim {fut} +':normal iThis is a test' -c x") + cmd = f"vim {fut} '+:normal iThis is a test' -c x" + + editor_container.exec_run(cmd) process = Process.in_container( exe_path='/usr/bin/vim', - args=f'vim {fut} +:normal iThis is a test -c x', + args=cmd, name='vim', container_id=editor_container.id[:12], ) @@ -86,10 +88,11 @@ def test_open_file(editor_container, server): vi_test_file = get_vi_test_file('/mounted') container_id = editor_container.id[:12] + cmd = f"vim {fut} '+:normal iThis is a test' -c x" + # We ensure the file exists before editing. editor_container.exec_run(f'touch {fut}') - editor_container.exec_run( - f"vim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) touch_process = Process.in_container( exe_path='/usr/bin/touch', @@ -99,7 +102,7 @@ def test_open_file(editor_container, server): ) vi_process = Process.in_container( exe_path='/usr/bin/vim', - args=f'vim {fut} +:normal iThis is a test -c x', + args=cmd, name='vim', container_id=container_id, ) @@ -148,10 +151,11 @@ def test_open_file_ovfs(editor_container, server): vi_test_file = get_vi_test_file('/container-dir') container_id = editor_container.id[:12] + cmd = f"vim {fut} '+:normal iThis is a test' -c x" + # We ensure the file exists before editing. editor_container.exec_run(f'touch {fut}') - editor_container.exec_run( - f"vim {fut} +':normal iThis is a test' -c x") + editor_container.exec_run(cmd) touch_process = Process.in_container( exe_path='/usr/bin/touch', @@ -161,7 +165,7 @@ def test_open_file_ovfs(editor_container, server): ) vi_process = Process.in_container( exe_path='/usr/bin/vim', - args=f'vim {fut} +:normal iThis is a test -c x', + args=cmd, name='vim', container_id=container_id, ) diff --git a/tests/test_file_open.py b/tests/test_file_open.py index 07f7c227..aa1ff508 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -3,7 +3,7 @@ import pytest -from conftest import join_path_with_filename, path_to_string +from utils import join_path_with_filename, path_to_string, rust_style_quote from event import Event, EventType, Process diff --git a/tests/test_path_chmod.py b/tests/test_path_chmod.py index 0573741a..fde7589f 100644 --- a/tests/test_path_chmod.py +++ b/tests/test_path_chmod.py @@ -3,7 +3,7 @@ import pytest -from conftest import join_path_with_filename, path_to_string +from utils import join_path_with_filename, path_to_string from event import Event, EventType, Process diff --git a/tests/test_path_chown.py b/tests/test_path_chown.py index 189a2af2..a8c75c0b 100644 --- a/tests/test_path_chown.py +++ b/tests/test_path_chown.py @@ -3,7 +3,7 @@ import pytest -from conftest import path_to_string +from utils import path_to_string, rust_style_quote from event import Event, EventType, Process # Tests here have to use a container to do 'chown', @@ -37,14 +37,13 @@ def test_chown(test_container, server, filename): fut = f'/container-dir/{path_to_string(filename)}' # Create the file and chown it - # Use shlex.quote to properly escape special characters for shell - fut_quoted = shlex.quote(fut) - test_container.exec_run(f'touch {fut_quoted}') - test_container.exec_run(f'chown {TEST_UID}:{TEST_GID} {fut_quoted}') + fut_quoted = rust_style_quote(fut) - # The args in the event won't have quotes (shell removes them) - touch_cmd = f'touch {fut}' - chown_cmd = f'chown {TEST_UID}:{TEST_GID} {fut}' + touch_cmd = f'touch {fut_quoted}' + chown_cmd = f'chown {TEST_UID}:{TEST_GID} {fut_quoted}' + + test_container.exec_run(touch_cmd) + test_container.exec_run(chown_cmd) touch = Process.in_container( exe_path='/usr/bin/touch', diff --git a/tests/test_path_rename.py b/tests/test_path_rename.py index 5d6eb42d..1d4f08b7 100644 --- a/tests/test_path_rename.py +++ b/tests/test_path_rename.py @@ -2,7 +2,7 @@ import pytest -from conftest import join_path_with_filename, path_to_string +from utils import join_path_with_filename, path_to_string from event import Event, EventType, Process diff --git a/tests/test_path_unlink.py b/tests/test_path_unlink.py index 36313659..4dff11da 100644 --- a/tests/test_path_unlink.py +++ b/tests/test_path_unlink.py @@ -4,7 +4,7 @@ import docker import pytest -from conftest import join_path_with_filename, path_to_string +from utils import join_path_with_filename, path_to_string from event import Event, EventType, Process diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..b99254d9 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,73 @@ +import os +import re + + +def join_path_with_filename(directory, filename): + """ + Join a directory path with a filename, handling bytes filenames properly. + + When filename is bytes (e.g., containing invalid UTF-8), converts the + directory to bytes before joining to avoid mixing str and bytes. + + Args: + directory: Directory path (str) + filename: Filename (str or bytes) + + Returns: + Joined path (str or bytes, matching the filename type) + """ + if isinstance(filename, bytes): + return os.path.join(os.fsencode(directory), filename) + else: + return os.path.join(directory, filename) + + +def path_to_string(path): + """ + Convert a filesystem path to string, replacing invalid UTF-8 with U+FFFD. + + This matches the behavior of Rust's String::from_utf8_lossy() used in + the fact codebase. + + Args: + path: Filesystem path (str or bytes) + + Returns: + String representation with invalid UTF-8 replaced by replacement character + """ + if isinstance(path, bytes): + return path.decode('utf-8', errors='replace') + else: + return path + + +def rust_style_quote(s): + """ + Quote a string in the manner of shlex::try_join() rust function. + + Use of python's shlex was considered but has a different quoting + strategy. + + Args: + s: The string to quote + """ + if not s: + return "''" + if re.search(r'[^a-zA-Z0-9_.:/-]', s): + # Try to match the behavior of shlex.try_join() + if '\'' in s and not '"' in s: + return f'"{s}"' + escaped = s.replace("'", "\\'") + return f"'{escaped}'" + return s + + +def rust_style_join(args): + """ + Concatenate arguments after quoting them. Each argument is separated + by a single space. + + Args: + args: The string to quote + """ + return ' '.join(rust_style_quote(arg) for arg in args)