@@ -9,11 +9,19 @@ Runs `command` and pipes all output to `log`. The process will be terminated aft
99and `verbose` determines whether meta-logs ("process started" etc.) will be printed.
1010"""
1111function run_with_timeout (
12- command; log= stdout , timeout = 40 * 60 , name = " " ,
13- wait_time = 1 , verbose = true , kill_timeout = 60 ,
14- max_timeout = 3 * 60 * 60
12+ command;
13+ log = stdout ,
14+ timeout = 40 * 60 ,
15+ name = " " ,
16+ wait_time = 1 ,
17+ verbose = true ,
18+ kill_timeout = 60 ,
19+ max_timeout = 3 * 60 * 60 ,
1520 )
16- print_interval = 60 / wait_time
21+ pushfirst! (command. exec, " setsid" )
22+ @info " >> starting" command timeout kill_timeout max_timeout
23+
24+ print_interval = 60 / wait_time
1725 print_in = print_interval
1826
1927 out_io = IOBuffer ()
@@ -35,7 +43,7 @@ function run_with_timeout(
3543 io = try
3644 log isa String ? open (log, " w" ) : log
3745 catch err
38- @error " Error opening logfile, falling back to stdout" error= err
46+ @error " Error opening logfile, falling back to stdout" error = err
3947 logfallback = true
4048 stdout
4149 end
@@ -47,13 +55,13 @@ function run_with_timeout(
4755 total = time () - job_start
4856 if elapsed > timeout || total > max_timeout
4957 verbose && @info (" Terminating $name " )
50- kill (process)
58+ kill_pg (process)
5159 # Handle scenarios where SIGTERM is blocked/ignored/handled by the process
5260 start_time = time ()
5361 while process_running (process)
5462 if time () - start_time > kill_timeout
5563 verbose && @info (" Killing $name " )
56- kill (process, Base . SIGKILL )
64+ kill_pg (process, true )
5765 end
5866 sleep (5 )
5967 end
@@ -79,9 +87,10 @@ function run_with_timeout(
7987 end
8088
8189 verbose && println ()
82- verbose && @info (" $name completed in $(round (time () - tstart, digits= 1 )) seconds" )
90+ verbose &&
91+ @info (" $name completed in $(round (time () - tstart, digits= 1 )) seconds" )
8392 catch err
84- @error " Error while running $(name) with timeout." error= err
93+ @error " Error while running $(name) with timeout." error = err
8594 finally
8695 errstr, outstr = readstr_buffer .((out_io, err_io))
8796 isempty (outstr) || println (io, outstr)
103112function readstr_buffer (x:: IOBuffer )
104113 return String (take! (x))
105114end
115+
116+ function get_pgid (pid:: Int32 )
117+ out = try
118+ readchomp (` ps -o pgid= -p $pid ` )
119+ catch ex
120+ @warn (" Failed to fetch pgid" , exception = (ex, catch_backtrace ()))
121+ nothing
122+ end
123+ isnothing (out) && return nothing
124+ return parse (Int32, strip (out))
125+ end
126+
127+ function kill_pg (p:: Base.Process , force = false )
128+ pgid = get_pgid (getpid (p))
129+ if isnothing (pgid)
130+ @warn " No process group found for $process "
131+ kill (p)
132+ else
133+ kill_process_group (pgid, force)
134+ end
135+ end
136+
137+ function kill_process_group (pid:: Int32 , force)
138+ pgid = get_pgid (pid)
139+ type = if force
140+ " KILL"
141+ else
142+ " TERM"
143+ end
144+ cmd = [" kill" , " -$type " , " --" , " -$pgid " ]
145+ run (Cmd (cmd))
146+ end
0 commit comments