@@ -192,91 +192,6 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta
192192 return r .createOCIContainer (ctr , restoreOptions )
193193}
194194
195- // UpdateContainerStatus retrieves the current status of the container from the
196- // runtime. It updates the container's state but does not save it.
197- // If useRuntime is false, we will not directly hit runc to see the container's
198- // status, but will instead only check for the existence of the conmon exit file
199- // and update state to stopped if it exists.
200- func (r * ConmonOCIRuntime ) UpdateContainerStatus (ctr * Container ) error {
201- runtimeDir , err := util .GetRootlessRuntimeDir ()
202- if err != nil {
203- return err
204- }
205-
206- // Store old state so we know if we were already stopped
207- oldState := ctr .state .State
208-
209- state := new (spec.State )
210-
211- cmd := exec .Command (r .path , "state" , ctr .ID ())
212- cmd .Env = append (cmd .Env , fmt .Sprintf ("XDG_RUNTIME_DIR=%s" , runtimeDir ))
213-
214- outPipe , err := cmd .StdoutPipe ()
215- if err != nil {
216- return fmt .Errorf ("getting stdout pipe: %w" , err )
217- }
218- errPipe , err := cmd .StderrPipe ()
219- if err != nil {
220- return fmt .Errorf ("getting stderr pipe: %w" , err )
221- }
222-
223- err = cmd .Start ()
224- if err != nil {
225- return fmt .Errorf ("error launching container runtime: %w" , err )
226- }
227- defer func () {
228- _ = cmd .Wait ()
229- }()
230-
231- stderr , err := io .ReadAll (errPipe )
232- if err != nil {
233- return fmt .Errorf ("reading stderr: %s: %w" , ctr .ID (), err )
234- }
235- if strings .Contains (string (stderr ), "does not exist" ) || strings .Contains (string (stderr ), "No such file" ) {
236- if err := ctr .removeConmonFiles (); err != nil {
237- logrus .Debugf ("unable to remove conmon files for container %s" , ctr .ID ())
238- }
239- ctr .state .ExitCode = - 1
240- ctr .state .FinishedTime = time .Now ()
241- ctr .state .State = define .ContainerStateExited
242- return ctr .runtime .state .AddContainerExitCode (ctr .ID (), ctr .state .ExitCode )
243- }
244- if err := errPipe .Close (); err != nil {
245- return err
246- }
247-
248- out , err := io .ReadAll (outPipe )
249- if err != nil {
250- return fmt .Errorf ("reading stdout: %s: %w" , ctr .ID (), err )
251- }
252- if err := json .NewDecoder (bytes .NewReader (out )).Decode (state ); err != nil {
253- return fmt .Errorf ("decoding container status for container %s: %w" , ctr .ID (), err )
254- }
255- ctr .state .PID = state .Pid
256-
257- switch state .Status {
258- case "created" :
259- ctr .state .State = define .ContainerStateCreated
260- case "paused" :
261- ctr .state .State = define .ContainerStatePaused
262- case "running" :
263- ctr .state .State = define .ContainerStateRunning
264- case "stopped" :
265- ctr .state .State = define .ContainerStateStopped
266- default :
267- return fmt .Errorf ("unrecognized status returned by runtime for container %s: %s: %w" ,
268- ctr .ID (), state .Status , define .ErrInternal )
269- }
270-
271- // Handle ContainerStateStopping - keep it unless the container
272- // transitioned to no longer running.
273- if oldState == define .ContainerStateStopping && (ctr .state .State == define .ContainerStatePaused || ctr .state .State == define .ContainerStateRunning ) {
274- ctr .state .State = define .ContainerStateStopping
275- }
276-
277- return nil
278- }
279-
280195// StartContainer starts the given container.
281196// Sets time the container was started, but does not save it.
282197func (r * ConmonOCIRuntime ) StartContainer (ctr * Container ) error {
@@ -358,6 +273,8 @@ func (r *ConmonOCIRuntime) KillContainer(ctr *Container, signal uint, all bool)
358273
359274// If captureStderr is requested, OCI runtime STDERR will be captured as a
360275// *bytes.buffer and returned; otherwise, it is set to os.Stderr.
276+ // IMPORTANT: Thus function is called from an unlocked container state in
277+ // the stop() code path so do not modify the state here.
361278func (r * ConmonOCIRuntime ) killContainer (ctr * Container , signal uint , all , captureStderr bool ) (* bytes.Buffer , error ) {
362279 logrus .Debugf ("Sending signal %d to container %s" , signal , ctr .ID ())
363280 runtimeDir , err := util .GetRootlessRuntimeDir ()
@@ -381,15 +298,15 @@ func (r *ConmonOCIRuntime) killContainer(ctr *Container, signal uint, all, captu
381298 stderr = stderrBuffer
382299 }
383300 if err := utils .ExecCmdWithStdStreams (os .Stdin , os .Stdout , stderr , env , r .path , args ... ); err != nil {
384- // Update container state - there's a chance we failed because
385- // the container exited in the meantime.
386- if err2 := r . UpdateContainerStatus (ctr ); err2 != nil {
387- logrus . Infof ( "Error updating status for container %s: %v" , ctr . ID (), err2 )
388- }
389- if ctr . ensureState ( define . ContainerStateStopped , define . ContainerStateExited ) {
390- return stderrBuffer , fmt . Errorf ( "%w: %s" , define .ErrCtrStateInvalid , ctr . state . State )
301+ rErr := err
302+ // quick check if ctr pid is still alive
303+ if err := unix . Kill (ctr . state . PID , 0 ); err == unix . ESRCH {
304+ // pid already dead so signal sending fails logically, set error to ErrCtrStateInvalid
305+ // This is needed for the ProxySignals() function which already ignores the error to not cause flakes
306+ // when the ctr process just exited.
307+ rErr = define .ErrCtrStateInvalid
391308 }
392- return stderrBuffer , fmt .Errorf ("sending signal to container %s: %w" , ctr .ID (), err )
309+ return stderrBuffer , fmt .Errorf ("sending signal to container %s: %w" , ctr .ID (), rErr )
393310 }
394311
395312 return stderrBuffer , nil
@@ -401,6 +318,8 @@ func (r *ConmonOCIRuntime) killContainer(ctr *Container, signal uint, all, captu
401318// immediately kill with SIGKILL.
402319// Does not set finished time for container, assumes you will run updateStatus
403320// after to pull the exit code.
321+ // IMPORTANT: Thus function is called from an unlocked container state in
322+ // the stop() code path so do not modify the state here.
404323func (r * ConmonOCIRuntime ) StopContainer (ctr * Container , timeout uint , all bool ) error {
405324 logrus .Debugf ("Stopping container %s (PID %d)" , ctr .ID (), ctr .state .PID )
406325
0 commit comments