Skip to content

Commit e39bb38

Browse files
authored
test(sandbox): fix flaky arm64 procfs binary_path tests (#881)
1 parent ae7e901 commit e39bb38

File tree

1 file changed

+61
-29
lines changed

1 file changed

+61
-29
lines changed

crates/openshell-sandbox/src/procfs.rs

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,52 @@ mod tests {
399399
use super::*;
400400
use std::io::Write;
401401

402+
/// Block until `/proc/<pid>/exe` points at `target`. `Command::spawn` returns
403+
/// once the child is scheduled, not once it has completed `exec()`; on
404+
/// contended runners the readlink can still show the parent (test harness)
405+
/// binary for a brief window. Byte-level `starts_with` tolerates the kernel's
406+
/// `" (deleted)"` suffix on unlinked executables.
407+
#[cfg(target_os = "linux")]
408+
fn wait_for_child_exec(pid: i32, target: &std::path::Path) {
409+
use std::os::unix::ffi::OsStrExt as _;
410+
let target_bytes = target.as_os_str().as_bytes();
411+
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2);
412+
loop {
413+
if let Ok(link) = std::fs::read_link(format!("/proc/{pid}/exe"))
414+
&& link.as_os_str().as_bytes().starts_with(target_bytes)
415+
{
416+
return;
417+
}
418+
assert!(
419+
std::time::Instant::now() < deadline,
420+
"child pid {pid} did not exec into {target:?} within 2s"
421+
);
422+
std::thread::sleep(std::time::Duration::from_millis(10));
423+
}
424+
}
425+
426+
/// Retry `Command::spawn` on `ETXTBSY`. The kernel rejects `execve` when
427+
/// `inode->i_writecount > 0`, and the release of that counter after the
428+
/// writer fd is closed isn't synchronous with `close(2)` under contention —
429+
/// so the very-next-instruction `execve` can still race it. Any other error
430+
/// surfaces immediately.
431+
#[cfg(target_os = "linux")]
432+
fn spawn_retrying_on_etxtbsy(cmd: &mut std::process::Command) -> std::process::Child {
433+
let mut attempts = 0;
434+
loop {
435+
match cmd.spawn() {
436+
Ok(child) => return child,
437+
Err(err)
438+
if err.kind() == std::io::ErrorKind::ExecutableFileBusy && attempts < 20 =>
439+
{
440+
attempts += 1;
441+
std::thread::sleep(std::time::Duration::from_millis(50));
442+
}
443+
Err(err) => panic!("spawn failed after {attempts} ETXTBSY retries: {err}"),
444+
}
445+
}
446+
}
447+
402448
#[test]
403449
fn file_sha256_computes_correct_hash() {
404450
let mut tmp = tempfile::NamedTempFile::new().unwrap();
@@ -457,11 +503,11 @@ mod tests {
457503
// child is still running. The child keeps the exec mapping via
458504
// `/proc/<pid>/exe`, but readlink will now return the tainted
459505
// "<path> (deleted)" string.
460-
let mut child = std::process::Command::new(&exe_path)
461-
.arg("5")
462-
.spawn()
463-
.unwrap();
506+
let mut cmd = std::process::Command::new(&exe_path);
507+
cmd.arg("5");
508+
let mut child = spawn_retrying_on_etxtbsy(&mut cmd);
464509
let pid: i32 = child.id().cast_signed();
510+
wait_for_child_exec(pid, &exe_path);
465511
std::fs::remove_file(&exe_path).unwrap();
466512

467513
// Sanity check: the raw readlink should contain " (deleted)".
@@ -507,11 +553,11 @@ mod tests {
507553
std::fs::copy("/bin/sleep", &exe_path).unwrap();
508554
std::fs::set_permissions(&exe_path, std::fs::Permissions::from_mode(0o755)).unwrap();
509555

510-
let mut child = std::process::Command::new(&exe_path)
511-
.arg("5")
512-
.spawn()
513-
.unwrap();
556+
let mut cmd = std::process::Command::new(&exe_path);
557+
cmd.arg("5");
558+
let mut child = spawn_retrying_on_etxtbsy(&mut cmd);
514559
let pid: i32 = child.id().cast_signed();
560+
wait_for_child_exec(pid, &exe_path);
515561

516562
// File is still linked — binary_path must return the path unchanged,
517563
// suffix and all.
@@ -537,9 +583,8 @@ mod tests {
537583
#[test]
538584
fn binary_path_strips_suffix_for_non_utf8_filename() {
539585
use std::ffi::OsString;
540-
use std::io::Write;
541586
use std::os::unix::ffi::{OsStrExt, OsStringExt};
542-
use std::os::unix::fs::{OpenOptionsExt, PermissionsExt};
587+
use std::os::unix::fs::PermissionsExt;
543588

544589
let tmp = tempfile::TempDir::new().unwrap();
545590
// 0xFF is not valid UTF-8. Build the filename on raw bytes.
@@ -548,27 +593,14 @@ mod tests {
548593
raw_name.extend_from_slice(b".bin");
549594
let exe_path = tmp.path().join(OsString::from_vec(raw_name));
550595

551-
// Write bytes explicitly (instead of `std::fs::copy`) with an
552-
// explicit `sync_all()` + scope drop so the write fd is fully closed
553-
// before we `exec()` the file. Otherwise concurrent tests can race
554-
// the kernel into returning ETXTBSY on spawn.
555-
let bytes = std::fs::read("/bin/sleep").expect("read /bin/sleep");
556-
{
557-
let mut f = std::fs::OpenOptions::new()
558-
.write(true)
559-
.create_new(true)
560-
.mode(0o755)
561-
.open(&exe_path)
562-
.expect("create non-UTF-8 target file");
563-
f.write_all(&bytes).expect("write bytes");
564-
f.sync_all().expect("sync_all before exec");
565-
}
596+
std::fs::copy("/bin/sleep", &exe_path).unwrap();
597+
std::fs::set_permissions(&exe_path, std::fs::Permissions::from_mode(0o755)).unwrap();
566598

567-
let mut child = std::process::Command::new(&exe_path)
568-
.arg("5")
569-
.spawn()
570-
.unwrap();
599+
let mut cmd = std::process::Command::new(&exe_path);
600+
cmd.arg("5");
601+
let mut child = spawn_retrying_on_etxtbsy(&mut cmd);
571602
let pid: i32 = child.id().cast_signed();
603+
wait_for_child_exec(pid, &exe_path);
572604
std::fs::remove_file(&exe_path).unwrap();
573605

574606
// Sanity: raw readlink ends with " (deleted)" and is not valid UTF-8.

0 commit comments

Comments
 (0)