Fix for deadlock in python callback

sgonorov · ababushk · commit 5041b1dc4e51 · 2025-11-25T12:25:09.000Z
diff --git a/samples/python/image_generation/image2image.py b/samples/python/image_generation/image2image.py
@@ -26,13 +26,20 @@ def main():
 
     image = read_image(args.image)
 
-    image_tensor = pipe.generate(args.prompt, image,
-        strength=0.8 # controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised
+    def callback(step, num_steps, latent):
+        print(f"Step {step + 1}/{num_steps}")
+        return False
+
+    image_tensor = pipe.generate(
+        args.prompt,
+        image,
+        strength=0.8,
+        callback=callback
     )
 
     image = Image.fromarray(image_tensor.data[0])
     image.save("image.bmp")
 
 
-if '__main__' == __name__:
+if __name__ == '__main__':
     main()
diff --git a/samples/python/image_generation/inpainting.py b/samples/python/image_generation/inpainting.py
@@ -28,11 +28,15 @@ def main():
     image = read_image(args.image)
     mask_image = read_image(args.mask)
 
-    image_tensor = pipe.generate(args.prompt, image, mask_image)
+    def callback(step, num_steps, latent):
+        print(f"Step {step + 1}/{num_steps}")
+        return False
+
+    image_tensor = pipe.generate(args.prompt, image, mask_image, callback=callback)
 
     image = Image.fromarray(image_tensor.data[0])
     image.save("image.bmp")
 
 
-if '__main__' == __name__:
+if __name__ == '__main__':
     main()
diff --git a/samples/python/image_generation/text2image.py b/samples/python/image_generation/text2image.py
@@ -17,16 +17,21 @@ def main():
     device = 'CPU'  # GPU can be used as well
     pipe = openvino_genai.Text2ImagePipeline(args.model_dir, device)
 
+    def callback(step, num_steps, latent):
+        print(f"Step {step + 1}/{num_steps}")
+        return False
+
     image_tensor = pipe.generate(
         args.prompt,
         width=512,
         height=512,
         num_inference_steps=20,
-        num_images_per_prompt=1)
+        num_images_per_prompt=1,
+        callback=callback)
 
     image = Image.fromarray(image_tensor.data[0])
     image.save("image.bmp")
 
 
-if '__main__' == __name__:
+if __name__ == '__main__':
     main()
diff --git a/src/cpp/src/image_generation/threaded_callback.hpp b/src/cpp/src/image_generation/threaded_callback.hpp
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <thread>
+#include <variant>
 
 #include "synchronized_queue.hpp"
 
@@ -33,7 +34,7 @@ class ThreadedCallbackWrapper {
             return CallbackStatus::STOP;
         }
 
-        m_squeue.push({step, num_steps, latent});
+        m_squeue.push(std::make_tuple(step, num_steps, latent));
 
         return CallbackStatus::RUNNING;
     }
@@ -44,7 +45,7 @@ class ThreadedCallbackWrapper {
         }
 
         m_status = CallbackStatus::STOP;
-        m_squeue.empty();
+        m_squeue.push(std::monostate());
 
         if (m_worker_thread && m_worker_thread->joinable()) {
             m_worker_thread->join();
@@ -58,18 +59,23 @@ class ThreadedCallbackWrapper {
 private:
     std::function<bool(size_t, size_t, ov::Tensor&)> m_callback = nullptr;
     std::shared_ptr<std::thread> m_worker_thread = nullptr;
-    SynchronizedQueue<std::tuple<size_t, size_t, ov::Tensor>> m_squeue;
+    SynchronizedQueue<std::variant<std::tuple<size_t, size_t, ov::Tensor>, std::monostate>> m_squeue;
 
     std::atomic<CallbackStatus> m_status = CallbackStatus::RUNNING;
 
     void _worker() {
         while (m_status == CallbackStatus::RUNNING) {
-            // wait for queue pull
-            auto [step, num_steps, latent] = m_squeue.pull();
-
-            if (m_callback(step, num_steps, latent)) {
-                m_status = CallbackStatus::STOP;
-                m_squeue.empty();
+            auto item = m_squeue.pull();
+
+            if (auto callback_data = std::get_if<std::tuple<size_t, size_t, ov::Tensor>>(&item)) {
+                auto& [step, num_steps, latent] = *callback_data;
+                const auto should_stop = m_callback(step, num_steps, latent);
+                
+                if (should_stop) {
+                    m_status = CallbackStatus::STOP;
+                }
+            } else if (std::get_if<std::monostate>(&item)) {
+                break;
             }
         }
     }
diff --git a/src/cpp/src/synchronized_queue.hpp b/src/cpp/src/synchronized_queue.hpp
@@ -22,7 +22,7 @@ class SynchronizedQueue
 
     T back() {
         std::unique_lock<std::mutex> lock(m_mutex);
-        m_cv.wait(lock, [this]{return !m_queue.empty(); });
+        m_cv.wait(lock, [this]{return !m_queue.empty();});
         return m_queue.back();
     }
 
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
@@ -373,7 +373,24 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
     } else if (py::isinstance<ov::genai::Generator>(py_obj)) {
         return py::cast<std::shared_ptr<ov::genai::Generator>>(py_obj);
     } else if (py::isinstance<py::function>(py_obj) && property_name == "callback") {
-        return py::cast<std::function<bool(size_t, size_t, ov::Tensor&)>>(py_obj);
+        auto py_callback = py::cast<py::function>(py_obj);
+        auto shared_callback = std::shared_ptr<py::function>(
+            new py::function(py_callback),
+            [](py::function* f) {
+                if (Py_IsInitialized()) {
+                    PyGILState_STATE gstate = PyGILState_Ensure();
+                    delete f;
+                    PyGILState_Release(gstate);
+                }
+            }
+        );
+
+        return std::function<bool(size_t, size_t, ov::Tensor&)>(
+            [shared_callback](size_t step, size_t num_steps, ov::Tensor& latent) -> bool {
+                py::gil_scoped_acquire acquire;
+                return (*shared_callback)(step, num_steps, latent).cast<bool>();
+            }
+        );
     } else if ((py::isinstance<py::function>(py_obj) || py::isinstance<ov::genai::StreamerBase>(py_obj) || py::isinstance<std::monostate>(py_obj)) && property_name == "streamer") {
         auto streamer = py::cast<ov::genai::pybind::utils::PyBindStreamerVariant>(py_obj);
         return ov::genai::streamer(pystreamer_to_streamer(streamer)).second;
@@ -437,12 +454,25 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
 
     std::visit(overloaded {
         [&streamer](const std::function<std::optional<uint16_t>(py::str)>& py_callback){
-            // Wrap python streamer with manual utf-8 decoding. Do not rely
-            // on pybind automatic decoding since it raises exceptions on incomplete strings.
-            auto callback_wrapped = [py_callback](std::string subword) -> ov::genai::StreamingStatus {
+            auto shared_callback = std::shared_ptr<std::function<std::optional<uint16_t>(py::str)>>(
+                new std::function<std::optional<uint16_t>(py::str)>(py_callback),
+                [](std::function<std::optional<uint16_t>(py::str)>* f) {
+                    if (Py_IsInitialized()) {
+                        PyGILState_STATE gstate = PyGILState_Ensure();
+                        delete f;
+                        PyGILState_Release(gstate);
+                    }
+                }
+            );
+
+            auto callback_wrapped = [shared_callback](std::string subword) -> ov::genai::StreamingStatus {
                 py::gil_scoped_acquire acquire;
-                auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
-                std::optional<uint16_t> callback_output = py_callback(py::reinterpret_borrow<py::str>(py_str));
+                PyObject* py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
+                if (!py_str) {
+                    PyErr_Clear();
+                    return StreamingStatus::RUNNING;
+                }
+                std::optional<uint16_t> callback_output = (*shared_callback)(py::reinterpret_steal<py::str>(py_str));
                 if (callback_output.has_value()) {
                     if (*callback_output == (uint16_t)StreamingStatus::RUNNING)
                         return StreamingStatus::RUNNING;
diff --git a/tests/python_tests/samples/test_inpainting.py b/tests/python_tests/samples/test_inpainting.py
@@ -11,19 +11,23 @@
 download_mask_image = download_test_content
 
 class TestInpainting:
+    PROMPT = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
+    IMAGE_PATH = "images/image.png"
+    MASK_PATH = "mask_image.png"
+    
     @pytest.mark.samples
     @pytest.mark.LCM_Dreamshaper_v7_int8_ov
     @pytest.mark.parametrize(
         "download_model, prompt",
         [
-            pytest.param("LCM_Dreamshaper_v7-int8-ov", "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"),
+            pytest.param("LCM_Dreamshaper_v7-int8-ov", PROMPT),
         ],
         indirect=["download_model"],
     )
     @pytest.mark.parametrize(
         "download_test_content, download_mask_image",
         [
-            pytest.param("images/image.png", "mask_image.png"),
+            pytest.param(IMAGE_PATH, MASK_PATH),
         ],
         indirect=["download_test_content", "download_mask_image"],
     )
diff --git a/tests/python_tests/samples/test_text2image.py b/tests/python_tests/samples/test_text2image.py
@@ -9,12 +9,14 @@
 from test_utils import run_sample
 
 class TestText2Image:
+    PROMPT = "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
+    
     @pytest.mark.samples
     @pytest.mark.dreamlike_anime_1_0
     @pytest.mark.parametrize(
         "convert_model, sample_args",
         [
-            pytest.param("dreamlike-anime-1.0", "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"),
+            pytest.param("dreamlike-anime-1.0", PROMPT),
         ],
         indirect=["convert_model"],
     )
@@ -29,7 +31,6 @@ def test_sample_text2image(self, convert_model, sample_args):
         cpp_command = [cpp_sample, convert_model, sample_args]
         run_sample(cpp_command)
 
-
     @pytest.mark.samples
     @pytest.mark.dreamlike_anime_1_0
     @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ class SynchronizedQueue`
`22`	`22`
`23`	`23`	`T back() {`
`24`	`24`	`std::unique_lock<std::mutex> lock(m_mutex);`
`25`		`- m_cv.wait(lock, [this]{return !m_queue.empty(); });`
	`25`	`+ m_cv.wait(lock, [this]{return !m_queue.empty();});`
`26`	`26`	`return m_queue.back();`
`27`	`27`	`}`
`28`	`28`