Skip to content

Commit 7948391

Browse files
authored
fix: child processes are not killed after timeout (#232)
* squash * fix * fix * fix * fix * fix * fox
1 parent fdc2959 commit 7948391

File tree

3 files changed

+76
-19
lines changed

3 files changed

+76
-19
lines changed

src/DocumentationGenerator.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,11 @@ function build_documentation(
156156

157157
# wait for all queued processes to finish
158158
for proc in process_queue
159-
wait(proc)
159+
if VERSION < v"1.11"
160+
wait(proc)
161+
else
162+
wait(proc, false)
163+
end
160164
end
161165
end
162166

src/utils/runners.jl

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,19 @@ Runs `command` and pipes all output to `log`. The process will be terminated aft
99
and `verbose` determines whether meta-logs ("process started" etc.) will be printed.
1010
"""
1111
function run_with_timeout(
12-
command; log=stdout, timeout = 40*60, name = "",
13-
wait_time = 1, verbose = true, kill_timeout = 60,
14-
max_timeout = 3*60*60
12+
command;
13+
log = stdout,
14+
timeout = 40 * 60,
15+
name = "",
16+
wait_time = 1,
17+
verbose = true,
18+
kill_timeout = 60,
19+
max_timeout = 3 * 60 * 60,
1520
)
16-
print_interval = 60/wait_time
21+
pushfirst!(command.exec, "setsid")
22+
@info ">> starting" command timeout kill_timeout max_timeout
23+
24+
print_interval = 60 / wait_time
1725
print_in = print_interval
1826

1927
out_io = IOBuffer()
@@ -35,7 +43,7 @@ function run_with_timeout(
3543
io = try
3644
log isa String ? open(log, "w") : log
3745
catch err
38-
@error "Error opening logfile, falling back to stdout" error=err
46+
@error "Error opening logfile, falling back to stdout" error = err
3947
logfallback = true
4048
stdout
4149
end
@@ -47,13 +55,13 @@ function run_with_timeout(
4755
total = time() - job_start
4856
if elapsed > timeout || total > max_timeout
4957
verbose && @info("Terminating $name")
50-
kill(process)
58+
kill_pg(process)
5159
# Handle scenarios where SIGTERM is blocked/ignored/handled by the process
5260
start_time = time()
5361
while process_running(process)
5462
if time() - start_time > kill_timeout
5563
verbose && @info("Killing $name")
56-
kill(process, Base.SIGKILL)
64+
kill_pg(process, true)
5765
end
5866
sleep(5)
5967
end
@@ -79,9 +87,10 @@ function run_with_timeout(
7987
end
8088

8189
verbose && println()
82-
verbose && @info("$name completed in $(round(time() - tstart, digits=1)) seconds")
90+
verbose &&
91+
@info("$name completed in $(round(time() - tstart, digits=1)) seconds")
8392
catch err
84-
@error "Error while running $(name) with timeout." error=err
93+
@error "Error while running $(name) with timeout." error = err
8594
finally
8695
errstr, outstr = readstr_buffer.((out_io, err_io))
8796
isempty(outstr) || println(io, outstr)
@@ -103,3 +112,35 @@ end
103112
function readstr_buffer(x::IOBuffer)
104113
return String(take!(x))
105114
end
115+
116+
function get_pgid(pid::Int32)
117+
out = try
118+
readchomp(`ps -o pgid= -p $pid`)
119+
catch ex
120+
@warn("Failed to fetch pgid", exception = (ex, catch_backtrace()))
121+
nothing
122+
end
123+
isnothing(out) && return nothing
124+
return parse(Int32, strip(out))
125+
end
126+
127+
function kill_pg(p::Base.Process, force = false)
128+
pgid = get_pgid(getpid(p))
129+
if isnothing(pgid)
130+
@warn "No process group found for $process"
131+
kill(p)
132+
else
133+
kill_process_group(pgid, force)
134+
end
135+
end
136+
137+
function kill_process_group(pid::Int32, force)
138+
pgid = get_pgid(pid)
139+
type = if force
140+
"KILL"
141+
else
142+
"TERM"
143+
end
144+
cmd = ["kill", "-$type", "--", "-$pgid"]
145+
run(Cmd(cmd))
146+
end

test/runtests.jl

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ const julia = first(Base.julia_cmd())
1818
"""
1919
proc, _ = DocumentationGenerator.run_with_timeout(`$julia -e $str`, timeout=7)
2020
wait(proc)
21-
@test !success(proc)
2221
@test !isfile(tempfile)
2322
end
2423

@@ -266,12 +265,7 @@ end
266265
using_failed = [false],
267266
)
268267
else
269-
(;
270-
p...,
271-
installs = [false],
272-
success = [false],
273-
doctype = ["missing"],
274-
)
268+
(; p..., installs = [false], success = [false], doctype = ["missing"])
275269
end
276270
end,
277271
(
@@ -298,19 +292,37 @@ end
298292
doctype = ["documenter"],
299293
using_failed = [false],
300294
),
295+
(
296+
name = "NetworkDynamics",
297+
url = "https://github.com/JuliaDynamics/NetworkDynamics.jl.git",
298+
uuid = "22e9dc34-2a0d-11e9-0de0-8588d035468b",
299+
versions = [v"0.9.16"],
300+
server_type = "github",
301+
api_url = "",
302+
installs = [true],
303+
success = [false],
304+
doctype = ["documenter"],
305+
using_failed = [false],
306+
),
301307
]
302308

303309
basepath = @__DIR__
304310
rm(joinpath(basepath, "build"), force = true, recursive = true)
305311

306312
DocumentationGenerator.build_documentation(
307-
packages, basepath = basepath, filter_versions = identity, processes = 6
313+
packages,
314+
basepath = basepath,
315+
filter_versions = identity,
316+
processes = 6,
317+
timeout = 600,
308318
)
309319

310320
build = joinpath(basepath, "build")
311321
@testset "build folder" begin
312322
for pkg in packages
313-
pkgbuild = joinpath(build, DocumentationGenerator.get_docs_dir(pkg.name, pkg.uuid))
323+
!pkg.success[1] && continue
324+
pkgbuild =
325+
joinpath(build, DocumentationGenerator.get_docs_dir(pkg.name, pkg.uuid))
314326
@test isdir(pkgbuild)
315327
@testset "$(pkg.name): $(version)" for (i, version) in enumerate(pkg.versions)
316328
log = joinpath(pkgbuild, "$(version).log")

0 commit comments

Comments
 (0)