openvinotoolkit · akodanka · Oct 27, 2025 · Nov 21, 2025
@@ -12,6 +12,7 @@
 #include <fstream>
 #include <memory>
 #include <string>
+#include <variant>
 
 namespace ov {
 
@@ -28,14 +29,13 @@ class MappedMemory {
 };
 
 /**
- * @brief Returns mapped memory for a file from provided path.
- * Instead of reading files, we can map the memory via mmap for Linux
- * in order to avoid time-consuming reading and reduce memory consumption.
+ * @brief Returns mapped memory for a file using path or file descriptor.
+ * Accepts either a string (file path) or int (file descriptor).
  *
- * @param path Path to a file which memory will be mmaped.
+ * @param path_or_fd std::variant containing either std::string (path) or int (fd).
  * @return MappedMemory shared ptr object which keep mmaped memory and control the lifetime.
  */
-std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path);
+std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::variant<std::string, int>& path_or_fd);
 
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
 

@@ -64,22 +64,28 @@ class MapHolder : public MappedMemory {
     MapHolder() = default;
 
     void set(const std::string& path) {
-        int prot = PROT_READ;
         int mode = O_RDONLY;
-        struct stat sb = {};
-        m_handle = HandleHolder(open(path.c_str(), mode));
-        if (m_handle.get() == -1) {
+        int fd = open(path.c_str(), mode);
+        if (fd == -1) {
             throw std::runtime_error("Can not open file " + path +
                                      " for mapping. Ensure that file exists and has appropriate permissions");
         }
-        if (fstat(m_handle.get(), &sb) == -1) {
-            throw std::runtime_error("Can not get file size for " + path);
+        set_from_fd(fd);
+    }
+
+    void set_from_fd(const int fd) {
+        int prot = PROT_READ;
+        struct stat sb = {};
+        m_handle = HandleHolder(fd);
+        if (fstat(fd, &sb) == -1) {
+            throw std::runtime_error("Can not get file size for fd=" + std::to_string(fd));
         }
         m_size = sb.st_size;
         if (m_size > 0) {
-            m_data = mmap(nullptr, m_size, prot, MAP_PRIVATE, m_handle.get(), 0);
+            m_data = mmap(nullptr, m_size, prot, MAP_PRIVATE, fd, 0);
             if (m_data == MAP_FAILED) {
-                throw std::runtime_error("Can not create file mapping for " + path + ", err=" + std::strerror(errno));
+                throw std::runtime_error("Can not create file mapping for fd=" + std::to_string(fd) +
+                                         ", err=" + std::strerror(errno));
             }
         } else {
             m_data = MAP_FAILED;
@@ -101,9 +107,16 @@ class MapHolder : public MappedMemory {
     }
 };
 
-std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path) {
+std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::variant<std::string, int>& path_or_fd) {
     auto holder = std::make_shared<MapHolder>();
-    holder->set(path);
+    std::visit([&holder](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, int>) {
+            holder->set_from_fd(arg);  // fd
+        } else if constexpr (std::is_same_v<T, std::string>) {
+            holder->set(arg);  // path
+        }
+    }, path_or_fd);
     return holder;
 }
 

@@ -128,10 +128,17 @@ class MapHolder : public ov::MappedMemory {
     HandleHolder m_mapping;
 };
 
-std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path) {
-    auto holder = std::make_shared<MapHolder>();
-    holder->set(path);
-    return holder;
+std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::variant<std::string, int>& path_or_fd) {
+    return std::visit([](auto&& arg) -> std::shared_ptr<ov::MappedMemory> {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, int>) {
+            OPENVINO_THROW("File descriptor-based memory mapping is not supported on Windows. Use path-based load_mmap_object instead.");
+        } else if constexpr (std::is_same_v<T, std::string>) {
+            auto holder = std::make_shared<MapHolder>();
+            holder->set(arg);
+            return holder;
+        }
+    }, path_or_fd);
 }
 
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

@@ -1337,12 +1337,45 @@ static constexpr Property<int32_t, PropertyMutability::RW> compilation_num_threa
  */
 static constexpr Property<std::vector<std::string>, PropertyMutability::RO> execution_devices{"EXECUTION_DEVICES"};
 
+/**
+ * @brief Structure to represent weights path with optional file accessor function
+ * @ingroup ov_runtime_cpp_prop_api
+ */
+struct WeightsPath {
+    WeightsPath() = default;
+
+    WeightsPath(const std::string& path_) : path{path_}, file_accessor{} {}
+
+    template <typename Func>
+    WeightsPath(const std::string& path_, Func&& file_accessor_)
+        : path{path_}, file_accessor{std::forward<Func>(file_accessor_)} {}
+
+    operator std::string() const {
+        return path;
+    }
+
+    std::string path;
+    std::function<Any(const std::string&)> file_accessor;
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const WeightsPath& weights_path_val) {
+    return os << weights_path_val.path;
+}
+
+inline std::istream& operator>>(std::istream& is, WeightsPath& weights_path_val) {
+    is >> weights_path_val.path;
+    return is;
+}
+/** @endcond */
+
 /**
  * @brief Path to the file with model's weights.
  *
  * @note This property is used for weightless caching. Only used when ov::CacheMode Property is set to "OPTIMIZE_SIZE".
+ * @ingroup ov_runtime_cpp_prop_api
  */
-static constexpr Property<std::string, PropertyMutability::RW> weights_path{"WEIGHTS_PATH"};
+static constexpr Property<WeightsPath, PropertyMutability::RW> weights_path{"WEIGHTS_PATH"};
 
 /**
  * @brief The precision of key cache compression

@@ -31,7 +31,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, dynamic_quantization_group_size, 0, "Dynamic
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, dynamic_quantization_group_size_max, UINT64_MAX, "Maximum dynamic quantization group size. When group_size is set as a higher value than this number, dynamic quantization will be turned off")
 OV_CONFIG_RELEASE_OPTION(ov::hint, kv_cache_precision, ov::element::dynamic, "")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_kernels_reuse, false, "")
-OV_CONFIG_RELEASE_OPTION(ov, weights_path, "", "Path to the model weights file used for weightless caching")
+OV_CONFIG_RELEASE_OPTION(ov, weights_path, ov::WeightsPath(), "Path to the model weights file used for weightless caching")
 OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floating point value that is used for runtime activation tensor scaling with fp16 inference precision")
 OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")

@@ -1137,14 +1137,14 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
         read(stream, compiled->m_bf16_consts);
 
         // Initialize weights stream if weightless flow
-        std::string weights_path;
+        ov::WeightsPath weights_path_prop;
         std::shared_ptr<ov::Model> model_ptr;
         // Cache model's constants
         WeightsContext::ConstsCache consts_cache;
         if (is_weightless) {
             if (properties.find(ov::weights_path.name()) != properties.end()) {
-                weights_path = properties.at(ov::weights_path.name()).as<std::string>();
-                NPUW_ASSERT(!weights_path.empty() &&
+                weights_path_prop = properties.at(ov::weights_path.name()).as<ov::WeightsPath>();
+                NPUW_ASSERT(!weights_path_prop.path.empty() &&
                             "Empty weights_path. Please provide WEIGHTS_PATH or MODEL_PTR in the configuration.");
             } else if (properties.find(ov::hint::model.name()) != properties.end()) {
                 model_ptr = std::const_pointer_cast<ov::Model>(
@@ -1180,8 +1180,16 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
 
         ov::npuw::s11n::WeightsPtr weights = nullptr;
         if (is_weightless) {
-            if (!weights_path.empty()) {
-                auto mapped_memory = ov::load_mmap_object(weights_path);
+            if (!weights_path_prop.path.empty()) {
+                std::shared_ptr<ov::MappedMemory> mapped_memory;
+                // Use file_accessor if available to get fd, otherwise use path
+                if (weights_path_prop.file_accessor) {
+                    auto result = weights_path_prop.file_accessor(weights_path_prop.path);
+                    int fd = result.as<int>();
+                    mapped_memory = ov::load_mmap_object(fd);
+                } else {
+                    mapped_memory = ov::load_mmap_object(weights_path_prop.path);
+                }
                 weights = std::make_shared<ov::npuw::s11n::Weights>(mapped_memory->data(),
                                                                     mapped_memory->size(),
                                                                     mapped_memory);
@@ -1191,7 +1199,8 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
         // FIXME: prolong lifetime of ov::Model for import with MODEL_PTR.
         // Unclear why it's needed, but without saving consts_cache until bank evaluation,
         // the memory is freed somewhere.
-        compiled->m_import_weights_ctx = WeightsContext(weights, weights_path, consts_cache, compiled->m_bf16_consts);
+        compiled->m_import_weights_ctx =
+            WeightsContext(weights, weights_path_prop, consts_cache, compiled->m_bf16_consts);
 
         // Deserialize compiled submodels
         std::size_t subm_size = 0;

@@ -61,10 +61,18 @@ ov::Tensor Const::eval() const {
     }
 
     // Weightless import case. Mmmap CPU weight on demand to avoid allocating all weights at once.
-    if (!m_weights_path.empty()) {
+    if (!m_weights_path.path.empty()) {
         NPUW_ASSERT(!m_read_from_bin &&
                     "Trying to read weight from weights file, but the weight has been already deserialized!");
-        auto mapped_memory = ov::load_mmap_object(m_weights_path);
+        std::shared_ptr<ov::MappedMemory> mapped_memory;
+        // Use file_accessor if available to get fd, otherwise use path
+        if (m_weights_path.file_accessor) {
+            auto result = m_weights_path.file_accessor(m_weights_path.path);
+            int fd = result.as<int>();
+            mapped_memory = ov::load_mmap_object(fd);
+        } else {
+            mapped_memory = ov::load_mmap_object(m_weights_path.path);
+        }
         m_mmaped_weights =
             std::make_shared<ov::npuw::s11n::Weights>(mapped_memory->data(), mapped_memory->size(), mapped_memory);
         return ov::Tensor(m_cached_type, m_cached_shape, m_mmaped_weights->get_ptr(m_offset));
@@ -80,7 +88,7 @@ LazyTensor::Meta Const::eval_meta() const {
     }
 
     // Weightless import case
-    if (!m_weights_path.empty()) {
+    if (!m_weights_path.path.empty()) {
         return {m_cached_shape, m_cached_type};
     }
 
@@ -115,7 +123,7 @@ void Const::read_weight(const ov::npuw::s11n::WeightsContext& ctx) {
             // It doesn't introduce extra allocation, however it allows to gradually 1 by 1
             // read mmaped CPU weights and allocate them on device without loading all the weights first.
             // Thus the memory consumption during import is greatly reduced but at the slight cost of performance.
-            NPUW_ASSERT(!ctx.weights_path.empty());
+            NPUW_ASSERT(!ctx.weights_path.path.empty());
             // Just save weights_path for the eval() to call the actual mmap.
             m_weights_path = ctx.weights_path;
         }

@@ -107,7 +107,7 @@ class Const {
     std::size_t m_offset = 0;
     std::size_t m_byte_size = 0;
     ov::Tensor m_read_from_bin;
-    std::string m_weights_path;
+    ov::WeightsPath m_weights_path;
     mutable ov::npuw::s11n::WeightsPtr m_mmaped_weights = nullptr;
     // FIXME: special case when a new Constant was added into the model,
     // then made into LazyTensor during folding. We need to keep a copy of it,

@@ -26,7 +26,7 @@ ov::npuw::s11n::WeightsContext::WeightsContext(bool _is_weightless,
 
 // NOTE: This construtor can and should only be used when importing blobs
 ov::npuw::s11n::WeightsContext::WeightsContext(const ov::npuw::s11n::WeightsPtr& _weights,
-                                               const std::string& _weights_path,
+                                               const ov::WeightsPath& _weights_path,
                                                const s11n::WeightsContext::ConstsCache& _consts_cache,
                                                const BF16Cache& _bf16_consts)
     : weights(_weights),

@@ -17,6 +17,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include "openvino/runtime/properties.hpp"
+
 namespace ov {
 namespace npuw {
 namespace s11n {
@@ -124,7 +126,7 @@ struct WeightsContext {
 
     // NOTE: This construtor can and should only be used when importing weightless blobs
     WeightsContext(const ov::npuw::s11n::WeightsPtr& _weights,
-                   const std::string& _weights_path,
+                   const ov::WeightsPath& _weights_path,
                    const ConstsCache& _consts_cache,
                    const BF16Cache& _bf16_consts);
 
@@ -138,7 +140,7 @@ struct WeightsContext {
     bool is_weightless = true;
     std::unordered_map<const void*, std::size_t> const_to_offset;
     ov::npuw::s11n::WeightsPtr weights = nullptr;
-    std::string weights_path;
+    ov::WeightsPath weights_path;
     ConstsCache consts_cache;
     BF16Cache bf16_consts;
 };