From 5233b8825e15deadf8e867341cf2b6b67339ad49 Mon Sep 17 00:00:00 2001 From: Naresh Date: Mon, 3 Nov 2025 13:24:58 +0000 Subject: [PATCH 1/3] Fix foreground commands blocking on background processes Foreground commands (exec, listFiles) would hang until background processes completed. This was caused by wait without arguments in the foreground pattern, which waits for ALL shell child processes. Bash waits for process substitutions automatically, so the explicit wait was unnecessary. Removing it fixes the hang. --- docs/SESSION_EXECUTION.md | 4 +- packages/sandbox-container/src/session.ts | 2 - tests/e2e/process-lifecycle-workflow.test.ts | 55 ++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/docs/SESSION_EXECUTION.md b/docs/SESSION_EXECUTION.md index 3a4d44f6..41308aca 100644 --- a/docs/SESSION_EXECUTION.md +++ b/docs/SESSION_EXECUTION.md @@ -28,8 +28,6 @@ Pseudo: > >(while read; printf "\x01\x01\x01%s\n" "$REPLY" >> "$log") \ 2> >(while read; printf "\x02\x02\x02%s\n" "$REPLY" >> "$log") EXIT_CODE=$? -# Ensure consumers have drained -wait 2>/dev/null # Atomically publish exit code echo "$EXIT_CODE" > "$exit.tmp" && mv "$exit.tmp" "$exit" ``` @@ -97,3 +95,5 @@ mkfifo "$sp" "$ep" - Why not tee? Tee doesn’t split stdout/stderr into separate channels with stable ordering without extra plumbing; our prefixes are simple and explicit. - Is process substitution portable? - It is supported by bash (we spawn bash with `--norc`). The container environment supports it; if portability constraints change, we can revisit. +- Why doesn't the foreground pattern need an explicit `wait`? + Bash waits for process substitutions automatically before returning control. Calling `wait` without arguments is dangerous because it waits for ALL child processes of the shell, not just the current command's process substitutions. In a session with background processes from `execStream`/`startProcess`, this would cause foreground commands to block on unrelated background jobs (e.g., `listFiles()` would hang for 60 seconds after `startProcess('sleep 60')`). diff --git a/packages/sandbox-container/src/session.ts b/packages/sandbox-container/src/session.ts index 2406c570..eda45726 100644 --- a/packages/sandbox-container/src/session.ts +++ b/packages/sandbox-container/src/session.ts @@ -746,8 +746,6 @@ export class Session { script += ` EXIT_CODE=$?\n`; } - // Ensure process-substitution consumers complete before writing exit code - script += ` wait 2>/dev/null\n`; script += ` \n`; script += ` # Write exit code\n`; script += ` echo "$EXIT_CODE" > ${safeExitCodeFile}.tmp\n`; diff --git a/tests/e2e/process-lifecycle-workflow.test.ts b/tests/e2e/process-lifecycle-workflow.test.ts index ef6730e5..d15f2cf4 100644 --- a/tests/e2e/process-lifecycle-workflow.test.ts +++ b/tests/e2e/process-lifecycle-workflow.test.ts @@ -186,6 +186,61 @@ describe('Process Lifecycle Workflow', () => { }); }, 90000); + test('should not block foreground operations when background processes are running', async () => { + const sandboxId = createSandboxId(); + const headers = createTestHeaders(sandboxId); + + // Start a long-running background process + const startResponse = await vi.waitFor( + async () => + fetchWithStartup(`${workerUrl}/api/process/start`, { + method: 'POST', + headers, + body: JSON.stringify({ + command: 'sleep 60' + }) + }), + { timeout: 90000, interval: 2000 } + ); + + const startData = await startResponse.json(); + const processId = startData.id; + + // Immediately run a foreground command - should complete quickly + const execStart = Date.now(); + const execResponse = await fetch(`${workerUrl}/api/execute`, { + method: 'POST', + headers, + body: JSON.stringify({ + command: 'echo "test"' + }) + }); + const execDuration = Date.now() - execStart; + + expect(execResponse.status).toBe(200); + expect(execDuration).toBeLessThan(2000); // Should complete in <2s, not wait for sleep + + // Test listFiles as well - it uses the same foreground execution path + const listStart = Date.now(); + const listResponse = await fetch(`${workerUrl}/api/list-files`, { + method: 'POST', + headers, + body: JSON.stringify({ + path: '/workspace' + }) + }); + const listDuration = Date.now() - listStart; + + expect(listResponse.status).toBe(200); + expect(listDuration).toBeLessThan(2000); // Should complete quickly + + // Cleanup + await fetch(`${workerUrl}/api/process/${processId}`, { + method: 'DELETE', + headers + }); + }, 90000); + test('should get process logs after execution', async () => { const sandboxId = createSandboxId(); const headers = createTestHeaders(sandboxId); From 71c4a34e6a72c27524755e1ee6361e2c424ee179 Mon Sep 17 00:00:00 2001 From: Naresh Date: Mon, 3 Nov 2025 13:28:53 +0000 Subject: [PATCH 2/3] Add changeset --- .changeset/neat-drinks-own.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/neat-drinks-own.md diff --git a/.changeset/neat-drinks-own.md b/.changeset/neat-drinks-own.md new file mode 100644 index 00000000..8dc4e127 --- /dev/null +++ b/.changeset/neat-drinks-own.md @@ -0,0 +1,5 @@ +--- +"@cloudflare/sandbox": patch +--- + +Fix foreground commands blocking on background processes From 6aee12d7e83a233c96c9ec9176ca8ae8ccc9d453 Mon Sep 17 00:00:00 2001 From: Naresh Date: Mon, 3 Nov 2025 15:32:28 +0000 Subject: [PATCH 3/3] Fix race condition in foreground exec pattern Process substitutions write asynchronously - bash returns when substitution processes close, but writes may still be buffered. With large output (base64-encoded files), the log could be incomplete when read, causing flaky CI failures. Replace process substitutions with direct file redirects. Bash waits for redirects to complete, ensuring logs are fully written before exit codes are published. --- docs/SESSION_EXECUTION.md | 20 ++++++++++---------- packages/sandbox-container/src/session.ts | 19 ++++++++++++------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/SESSION_EXECUTION.md b/docs/SESSION_EXECUTION.md index 41308aca..06bdeb22 100644 --- a/docs/SESSION_EXECUTION.md +++ b/docs/SESSION_EXECUTION.md @@ -14,20 +14,20 @@ This document explains how the container session executes commands reliably whil ### Foreground (`exec`) - Runs in the main bash shell so state persists across commands. -- Uses bash process substitution to prefix stdout/stderr inline and append to the per-command log file. -- After the command returns, we `wait` to ensure process-substitution consumers finish writing to the log before we write the exit code file. -- Why process substitution (not FIFOs): - - Foreground previously used FIFOs + background labelers, which can race on silent commands because FIFO open/close ordering depends on cross-process scheduling. - - Process substitution keeps execution local to the main shell and avoids FIFO semantics entirely. +- Writes stdout/stderr to temporary files, then prefixes and merges them into the log. +- Bash waits for file redirects to complete before continuing, ensuring the log is fully written before the exit code is published. +- This avoids race conditions from process substitution buffering where log reads could happen before writes complete. Pseudo: ``` # Foreground -{ command; } \ - > >(while read; printf "\x01\x01\x01%s\n" "$REPLY" >> "$log") \ - 2> >(while read; printf "\x02\x02\x02%s\n" "$REPLY" >> "$log") +{ command; } > "$log.stdout" 2> "$log.stderr" EXIT_CODE=$? +# Prefix and merge into main log +(while read line; do printf "\x01\x01\x01%s\n" "$line"; done < "$log.stdout") >> "$log" +(while read line; do printf "\x02\x02\x02%s\n" "$line"; done < "$log.stderr") >> "$log" +rm -f "$log.stdout" "$log.stderr" # Atomically publish exit code echo "$EXIT_CODE" > "$exit.tmp" && mv "$exit.tmp" "$exit" ``` @@ -95,5 +95,5 @@ mkfifo "$sp" "$ep" - Why not tee? Tee doesn’t split stdout/stderr into separate channels with stable ordering without extra plumbing; our prefixes are simple and explicit. - Is process substitution portable? - It is supported by bash (we spawn bash with `--norc`). The container environment supports it; if portability constraints change, we can revisit. -- Why doesn't the foreground pattern need an explicit `wait`? - Bash waits for process substitutions automatically before returning control. Calling `wait` without arguments is dangerous because it waits for ALL child processes of the shell, not just the current command's process substitutions. In a session with background processes from `execStream`/`startProcess`, this would cause foreground commands to block on unrelated background jobs (e.g., `listFiles()` would hang for 60 seconds after `startProcess('sleep 60')`). +- Why use temp files instead of process substitution for foreground? + Process substitutions run asynchronously - bash returns when the substitution processes close, but their writes to the log file may still be buffered. With large output (e.g., base64-encoded files), the log file can be incomplete when we try to read it. Using direct file redirects ensures bash waits for all writes to complete before continuing, eliminating this race condition. diff --git a/packages/sandbox-container/src/session.ts b/packages/sandbox-container/src/session.ts index eda45726..ed9dfe26 100644 --- a/packages/sandbox-container/src/session.ts +++ b/packages/sandbox-container/src/session.ts @@ -722,17 +722,17 @@ export class Session { // FOREGROUND PATTERN (for exec) // Command runs in main shell, state persists! - // FOREGROUND: Avoid FIFOs to eliminate race conditions. - // Use bash process substitution to prefix stdout/stderr while keeping - // execution in the main shell so session state persists across commands. + // FOREGROUND: Write stdout/stderr to temp files, then prefix and merge. + // This ensures bash waits for all writes to complete before continuing, + // avoiding race conditions when reading the log file. if (cwd) { const safeCwd = this.escapeShellPath(cwd); script += ` # Save and change directory\n`; script += ` PREV_DIR=$(pwd)\n`; script += ` if cd ${safeCwd}; then\n`; - script += ` # Execute command with prefixed streaming via process substitution\n`; - script += ` { ${command}; } < /dev/null > >(while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x01\\x01\\x01%s\\n' "$line"; done >> "$log") 2> >(while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x02\\x02\\x02%s\\n' "$line"; done >> "$log")\n`; + script += ` # Execute command, redirect to temp files\n`; + script += ` { ${command}; } < /dev/null > "$log.stdout" 2> "$log.stderr"\n`; script += ` EXIT_CODE=$?\n`; script += ` # Restore directory\n`; script += ` cd "$PREV_DIR"\n`; @@ -741,11 +741,16 @@ export class Session { script += ` EXIT_CODE=1\n`; script += ` fi\n`; } else { - script += ` # Execute command with prefixed streaming via process substitution\n`; - script += ` { ${command}; } < /dev/null > >(while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x01\\x01\\x01%s\\n' "$line"; done >> "$log") 2> >(while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x02\\x02\\x02%s\\n' "$line"; done >> "$log")\n`; + script += ` # Execute command, redirect to temp files\n`; + script += ` { ${command}; } < /dev/null > "$log.stdout" 2> "$log.stderr"\n`; script += ` EXIT_CODE=$?\n`; } + script += ` \n`; + script += ` # Prefix and merge stdout/stderr into main log\n`; + script += ` (while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x01\\x01\\x01%s\\n' "$line"; done < "$log.stdout" >> "$log") 2>/dev/null\n`; + script += ` (while IFS= read -r line || [[ -n "$line" ]]; do printf '\\x02\\x02\\x02%s\\n' "$line"; done < "$log.stderr" >> "$log") 2>/dev/null\n`; + script += ` rm -f "$log.stdout" "$log.stderr"\n`; script += ` \n`; script += ` # Write exit code\n`; script += ` echo "$EXIT_CODE" > ${safeExitCodeFile}.tmp\n`;