@@ -550,6 +550,7 @@ class BlockManager {
550550 * @return A vector of blocks (one for each layer) occupied by this sequence for this layer.
551551 */
552552 const std::vector<KVCacheBlock::Ptr>& get_block_table (uint64_t seq_id, size_t layer_idx) {
553+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
553554 OPENVINO_ASSERT (m_block_table.count (seq_id) == 1 );
554555 return m_block_table[seq_id][layer_idx];
555556 }
@@ -570,6 +571,7 @@ class BlockManager {
570571 * @return Number of blocks freed in each sequence in the group.
571572 */
572573 const size_t free_group_partially (SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
574+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
573575 size_t blocks_num = std::ceil (num_required_blocks / sequence_group->get_not_finished_sequences ().size ());
574576 auto not_finished_sequences = sequence_group->get_not_finished_sequences ();
575577 for (size_t idx = 0 ; idx < not_finished_sequences.size (); ++idx) {
@@ -613,6 +615,7 @@ class BlockManager {
613615 }
614616
615617 const size_t free_partially_beam_search_group (SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
618+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
616619 size_t physical_blocks_released = 0 ;
617620 size_t logical_blocks_released = 0 ;
618621 while (num_required_blocks > physical_blocks_released) {
@@ -632,6 +635,7 @@ class BlockManager {
632635 * @return The number of distinct physical blocks occupied by this sequence group.
633636 */
634637 const size_t get_number_of_blocks_occupied_by_sequence (SequenceGroup::Ptr sequence_group) {
638+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
635639 auto running_sequences = sequence_group->get_not_finished_sequences ();
636640 std::set<size_t > indices;
637641 for (size_t idx = 0 ; idx < running_sequences.size (); ++idx) {
@@ -652,6 +656,7 @@ class BlockManager {
652656 * @return Whether or not this BlockManager is managing this sequence group.
653657 */
654658 const bool has_block_table (uint64_t seq_id) {
659+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
655660 return m_block_table.count (seq_id) > 0 ;
656661 }
657662
@@ -766,6 +771,7 @@ class BlockManager {
766771 * other sequences tracked by this BlockManager.
767772 */
768773 void fork_sequence (uint64_t parent_id, uint64_t child_id) {
774+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
769775 OPENVINO_ASSERT (m_block_table.count (child_id) == 0 );
770776 m_block_table[child_id].resize (m_num_layers);
771777 for (size_t layer_idx = 0 ; layer_idx < m_num_layers; layer_idx++) {
@@ -782,6 +788,7 @@ class BlockManager {
782788 * @param seq_id Identifier of the sequence to free.
783789 */
784790 void free_sequence (size_t seq_id) {
791+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
785792 OPENVINO_ASSERT (m_block_table.find (seq_id) != m_block_table.end (), " sequence with id " , seq_id,
786793 " not found in BlockManager, but requested to free" );
787794 auto & block_table = m_block_table[seq_id];
@@ -846,6 +853,7 @@ class BlockManager {
846853 * @param logical_block_index_sets_to_free Sets (one for each layer) of logical block indices to be freed from this sequence.
847854 */
848855 void free_blocks_from_sequence (size_t seq_id, const std::vector<std::set<size_t >>& logical_block_index_sets_to_free) {
856+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
849857 std::vector<std::vector<size_t >> logical_block_indices_to_free (logical_block_index_sets_to_free.size ());
850858 for (size_t i = 0 ; i < logical_block_index_sets_to_free.size (); i++) {
851859 const auto & index_set = logical_block_index_sets_to_free[i];
@@ -916,6 +924,7 @@ class BlockManager {
916924 * allocated ones.
917925 */
918926 size_t required_blocks_count (SequenceGroup::CPtr seq_group) {
927+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
919928 std::vector<Sequence::CPtr> running_sequences = seq_group->get_running_sequences ();
920929 size_t blocks_count = 0 ; // total number of needed blocks for sequence group
921930 std::set<size_t > last_block_ids; // unique last block indices
@@ -973,6 +982,7 @@ class BlockManager {
973982 * @param seq_group Pointer to a sequence group.
974983 */
975984 void free_empty_physical_blocks (SequenceGroup::Ptr seq_group) {
985+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
976986 size_t num_logical_blocks = seq_group->get_num_logical_blocks ();
977987 if (num_logical_blocks == 0 ) {
978988 return ;
@@ -997,6 +1007,7 @@ class BlockManager {
9971007 * indices into which the source block contents should be copied into separately.
9981008 */
9991009 std::map<size_t , std::list<size_t >> append_slots (SequenceGroup::Ptr seq_group) {
1010+ std::lock_guard<std::mutex> lock (m_cached_blocks_map_mutex);
10001011 // Will always allocate the identical number of new blocks (if any) to each of the "layers" to keep the
10011012 // number of blocks occupied by each "layer" identical at all times.
10021013 size_t num_logical_blocks = seq_group->get_num_logical_blocks ();
0 commit comments