diff --git a/.changeset/clean-pans-switch.md b/.changeset/clean-pans-switch.md new file mode 100644 index 00000000..1437c7bc --- /dev/null +++ b/.changeset/clean-pans-switch.md @@ -0,0 +1,5 @@ +--- +"@cloudflare/sandbox": patch +--- + +fix race condition for PID retrieval diff --git a/package-lock.json b/package-lock.json index 0dacb840..3723f9aa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -415,6 +415,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -436,6 +437,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz", "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", @@ -575,6 +577,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.5.tgz", "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -1524,7 +1527,8 @@ "resolved": "https://registry.npmjs.org/@cloudflare/workers-types/-/workers-types-4.20251126.0.tgz", "integrity": "sha512-DSeI1Q7JYmh5/D/tw5eZCjrKY34v69rwj63hHt60nSQW5QLwWCbj/lLtNz9f2EPa+JCACwpLXHgCXfzJ29x66w==", "devOptional": true, - "license": "MIT OR Apache-2.0" + "license": "MIT OR Apache-2.0", + "peer": true }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", @@ -2838,6 +2842,7 @@ "integrity": "sha512-/g2d4sW9nUDJOMz3mabVQvOGhVa4e/BN/Um7yca9Bb2XTzPPnfTWHWQg+IsEYO7M3Vx+EXvaM/I2pJWIMun1bg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^4.0.0", "@octokit/graphql": "^7.1.0", @@ -4322,6 +4327,7 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz", "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==", "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -4337,6 +4343,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.7.tgz", "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -4346,6 +4353,7 @@ "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz", "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -4472,6 +4480,7 @@ "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/utils": "3.2.4", "pathe": "^2.0.3", @@ -4487,6 +4496,7 @@ "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/pretty-format": "3.2.4", "magic-string": "^0.30.17", @@ -4515,6 +4525,7 @@ "integrity": "sha512-hGISOaP18plkzbWEcP/QvtRW1xDXF2+96HbEX6byqQhAUbiS5oH6/9JwW+QsQCIYON2bI6QZBF+2PvOmrRZ9wA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@vitest/utils": "3.2.4", "fflate": "^0.8.2", @@ -4662,6 +4673,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -5407,6 +5419,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5700,6 +5713,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -7634,6 +7648,7 @@ "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", "devOptional": true, "license": "MIT", + "peer": true, "bin": { "jiti": "lib/jiti-cli.mjs" } @@ -7898,6 +7913,7 @@ "integrity": "sha512-utfs7Pr5uJyyvDETitgsaqSyjCb2qNRAtuqUeWIAKztsOYdcACf2KtARYXg2pSvhkt+9NfoaNY7fxjl6nuMjIQ==", "devOptional": true, "license": "MPL-2.0", + "peer": true, "dependencies": { "detect-libc": "^2.0.3" }, @@ -8183,7 +8199,6 @@ "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", "license": "MIT", - "peer": true, "dependencies": { "js-tokens": "^3.0.0 || ^4.0.0" }, @@ -9752,6 +9767,7 @@ "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", "license": "MIT", + "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -9968,6 +9984,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz", "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -9977,6 +9994,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz", "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -9988,8 +10006,7 @@ "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/react-katex": { "version": "3.1.0", @@ -10363,6 +10380,7 @@ "integrity": "sha512-ZRLgPlS91l4JztLYEZnmMcd3Umcla1hkXJgiEiR4HloRJBBoeaX8qogTu5Jfu36rRMVLndzqYv0h+M5gJAkUfg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@oxc-project/types": "=0.98.0", "@rolldown/pluginutils": "1.0.0-beta.51" @@ -11178,6 +11196,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -11377,6 +11396,7 @@ "integrity": "sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.25.0", "get-tsconfig": "^4.7.5" @@ -11536,6 +11556,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -11605,6 +11626,7 @@ "resolved": "https://registry.npmjs.org/unenv/-/unenv-2.0.0-rc.24.tgz", "integrity": "sha512-i7qRCmY42zmCwnYlh9H2SvLEypEFGye5iRmEMKjcGi7zk9UquigRjFtTLz0TYqr0ZGLZhaMHl/foy1bZR+Cwlw==", "license": "MIT", + "peer": true, "dependencies": { "pathe": "^2.0.3" } @@ -12046,6 +12068,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.2.4.tgz", "integrity": "sha512-NL8jTlbo0Tn4dUEXEsUg8KeyG/Lkmc4Fnzb8JXN/Ykm9G4HNImjtABMJgkQoVjOBN/j2WAwDTRytdqJbZsah7w==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -12160,6 +12183,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -12192,6 +12216,7 @@ "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", @@ -12617,6 +12642,7 @@ "integrity": "sha512-Om5ns0Lyx/LKtYI04IV0bjIrkBgoFNg0p6urzr2asekJlfP18RqFzyqMFZKf0i9Gnjtz/JfAS/Ol6tjCe5JJsQ==", "hasInstallScript": true, "license": "Apache-2.0", + "peer": true, "bin": { "workerd": "bin/workerd" }, @@ -13380,6 +13406,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/packages/sandbox-container/src/services/process-service.ts b/packages/sandbox-container/src/services/process-service.ts index 6aa4cac0..38038af1 100644 --- a/packages/sandbox-container/src/services/process-service.ts +++ b/packages/sandbox-container/src/services/process-service.ts @@ -151,6 +151,7 @@ export class ProcessService { async (event) => { // Route events to process record listeners if (event.type === 'start' && event.pid !== undefined) { + processRecord.pid = event.pid; await this.store.update(processRecord.id, { pid: event.pid }); } else if (event.type === 'stdout' && event.data) { processRecord.stdout += event.data; diff --git a/packages/sandbox-container/src/session.ts b/packages/sandbox-container/src/session.ts index dc385ae1..65f824b2 100644 --- a/packages/sandbox-container/src/session.ts +++ b/packages/sandbox-container/src/session.ts @@ -25,7 +25,7 @@ import { randomUUID } from 'node:crypto'; import { watch } from 'node:fs'; -import { mkdir, rm, stat } from 'node:fs/promises'; +import { mkdir, open, rm, stat } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { basename, dirname, join } from 'node:path'; import type { ExecEvent, Logger } from '@repo/shared'; @@ -335,6 +335,7 @@ export class Session { const logFile = join(this.sessionDir!, `${commandId}.log`); const exitCodeFile = join(this.sessionDir!, `${commandId}.exit`); const pidFile = join(this.sessionDir!, `${commandId}.pid`); + const pidPipe = join(this.sessionDir!, `${commandId}.pid.pipe`); const labelersDoneFile = join( this.sessionDir!, `${commandId}.labelers.done` @@ -351,6 +352,10 @@ export class Session { // Track command this.trackCommand(commandId, pidFile, logFile, exitCodeFile); + // Create PID notification FIFO before sending command + // This ensures synchronization: shell writes PID, we read it (blocking) + await this.createPidPipe(pidPipe); + // Build FIFO script for BACKGROUND execution // Command runs concurrently, shell continues immediately const bashScript = this.buildFIFOScript( @@ -360,7 +365,8 @@ export class Session { exitCodeFile, options?.cwd, true, - options?.env + options?.env, + pidPipe ); if (this.shell!.stdin && typeof this.shell!.stdin !== 'number') { @@ -369,14 +375,14 @@ export class Session { throw new Error('Shell stdin is not available'); } - // Wait for PID file to be created (bash script writes it after starting command) - const pid = await this.waitForPidFile(pidFile); + // Wait for PID via FIFO (blocking read - guarantees synchronization) + const pid = await this.waitForPidViaPipe(pidPipe, pidFile); if (pid === undefined) { - this.logger.warn('PID file not created within timeout', { + this.logger.warn('PID not received within timeout', { sessionId: this.id, commandId, - pidFile + pidPipe }); } @@ -690,6 +696,7 @@ export class Session { * * @param isBackground - If true, command runs in background (for execStream/startProcess) * If false, command runs in foreground (for exec) - state persists! + * @param pidPipe - Optional path to PID notification FIFO (for reliable PID synchronization) */ private buildFIFOScript( command: string, @@ -698,7 +705,8 @@ export class Session { exitCodeFile: string, cwd?: string, isBackground = false, - env?: Record + env?: Record, + pidPipe?: string ): string { // Create unique FIFO names to prevent collisions const stdoutPipe = join(this.sessionDir!, `${cmdId}.stdout.pipe`); @@ -714,6 +722,7 @@ export class Session { const safeSessionDir = this.escapeShellPath(this.sessionDir!); const safePidFile = this.escapeShellPath(pidFile); const safeLabelersDoneFile = this.escapeShellPath(labelersDoneFile); + const safePidPipe = pidPipe ? this.escapeShellPath(pidPipe) : null; const indentLines = (input: string, spaces: number) => { const prefix = ' '.repeat(spaces); @@ -794,6 +803,10 @@ export class Session { script += ` # Write PID for process killing\n`; script += ` echo "$CMD_PID" > ${safePidFile}.tmp\n`; script += ` mv ${safePidFile}.tmp ${safePidFile}\n`; + if (safePidPipe) { + script += ` # Notify PID via FIFO (unblocks waitForPidViaPipe)\n`; + script += ` echo "$CMD_PID" > ${safePidPipe}\n`; + } script += ` # Background monitor: waits for labelers to finish (after FIFO EOF)\n`; script += ` # and then removes the FIFOs. PID file is cleaned up by TypeScript.\n`; script += ` (\n`; @@ -806,6 +819,10 @@ export class Session { script += ` else\n`; script += ` printf '\\x02\\x02\\x02%s\\n' "Failed to change directory to ${safeCwd}" >> "$log"\n`; script += ` EXIT_CODE=1\n`; + if (safePidPipe) { + script += ` # Notify error via FIFO (unblocks waitForPidViaPipe with empty/error)\n`; + script += ` echo "" > ${safePidPipe}\n`; + } script += ` fi\n`; } else { script += ` # Execute command in BACKGROUND (runs in subshell, enables concurrency)\n`; @@ -818,6 +835,10 @@ export class Session { script += ` # Write PID for process killing\n`; script += ` echo "$CMD_PID" > ${safePidFile}.tmp\n`; script += ` mv ${safePidFile}.tmp ${safePidFile}\n`; + if (safePidPipe) { + script += ` # Notify PID via FIFO (unblocks waitForPidViaPipe)\n`; + script += ` echo "$CMD_PID" > ${safePidPipe}\n`; + } script += ` # Background monitor: waits for labelers to finish (after FIFO EOF)\n`; script += ` # and then removes the FIFOs. PID file is cleaned up by TypeScript.\n`; script += ` (\n`; @@ -1111,6 +1132,105 @@ export class Session { return undefined; } + /** + * Create a FIFO (named pipe) for PID notification + * This must be created BEFORE sending the command to the shell + */ + private async createPidPipe(pidPipe: string): Promise { + // Remove any existing pipe first + try { + await rm(pidPipe, { force: true }); + } catch { + // Ignore errors + } + + // Create the FIFO using mkfifo command + const result = Bun.spawnSync(['mkfifo', pidPipe]); + if (result.exitCode !== 0) { + throw new Error(`Failed to create PID pipe: ${result.stderr.toString()}`); + } + } + + /** + * Wait for PID via FIFO with fallback to file polling + * + * Uses a FIFO for reliable synchronization: the shell writes the PID to the pipe, + * and we do a blocking read. This eliminates race conditions from file polling. + * Falls back to file polling if FIFO read fails (e.g., pipe broken). + * + * @param pidPipe - Path to the PID notification FIFO + * @param pidFile - Path to the PID file (fallback) + * @param timeoutMs - Timeout for waiting + * @returns The PID or undefined if not available within timeout + */ + private async waitForPidViaPipe( + pidPipe: string, + pidFile: string, + timeoutMs: number = 5000 + ): Promise { + try { + // Read from FIFO with timeout + // Opening a FIFO for reading blocks until a writer opens it + const pid = await Promise.race([ + this.readPidFromPipe(pidPipe), + Bun.sleep(timeoutMs).then(() => undefined) + ]); + + if (pid !== undefined) { + return pid; + } + + // Timeout reached, fall back to file polling + this.logger.warn( + 'PID pipe read timed out, falling back to file polling', + { + pidPipe, + pidFile, + timeoutMs + } + ); + } catch (error) { + // FIFO read failed, fall back to file polling + this.logger.warn('PID pipe read failed, falling back to file polling', { + pidPipe, + pidFile, + error: error instanceof Error ? error.message : String(error) + }); + } finally { + // Clean up the pipe + try { + await rm(pidPipe, { force: true }); + } catch { + // Ignore cleanup errors + } + } + + // Fallback: poll the PID file (less reliable but works) + return this.waitForPidFile(pidFile, 1000); + } + + /** + * Read PID from a FIFO (named pipe) + * This blocks until the shell writes the PID + * + * Uses Node.js fs.open which properly handles FIFOs - the open() call + * blocks until a writer opens the pipe, then we read the content. + */ + private async readPidFromPipe(pidPipe: string): Promise { + // Open the FIFO for reading - this blocks until a writer opens it + const fd = await open(pidPipe, 'r'); + try { + // Read content from the FIFO + const buffer = Buffer.alloc(64); + const { bytesRead } = await fd.read(buffer, 0, 64, null); + const content = buffer.toString('utf8', 0, bytesRead).trim(); + const pid = parseInt(content, 10); + return Number.isNaN(pid) ? undefined : pid; + } finally { + await fd.close(); + } + } + /** * Escape shell path for safe usage in bash scripts */ diff --git a/tests/e2e/comprehensive-workflow.test.ts b/tests/e2e/comprehensive-workflow.test.ts index 34728c7a..764f9ab3 100644 --- a/tests/e2e/comprehensive-workflow.test.ts +++ b/tests/e2e/comprehensive-workflow.test.ts @@ -298,8 +298,20 @@ const interval = setInterval(() => { expect(processData.id).toBeTruthy(); const processId = processData.id; - // Wait for process to complete - await new Promise((resolve) => setTimeout(resolve, 3000)); + // Wait for process to complete using waitForLog instead of fixed sleep + // This is more reliable under load as it waits for actual output + const waitResponse = await fetch( + `${workerUrl}/api/process/${processId}/waitForLog`, + { + method: 'POST', + headers, + body: JSON.stringify({ + pattern: 'Done', + timeout: 10000 + }) + } + ); + expect(waitResponse.status).toBe(200); // Get process logs const logsResponse = await fetch(