@@ -16,7 +16,8 @@ use std::collections::HashMap;
1616use std:: collections:: HashSet ;
1717use std:: sync:: Arc ;
1818
19- use databend_common_base:: runtime:: execute_futures_in_parallel;
19+ use databend_common_base:: base:: tokio:: sync:: Semaphore ;
20+ use databend_common_base:: runtime:: Runtime ;
2021use databend_common_catalog:: plan:: block_idx_in_segment;
2122use databend_common_catalog:: plan:: split_prefix;
2223use databend_common_catalog:: plan:: split_row_id;
@@ -25,13 +26,15 @@ use databend_common_catalog::plan::Projection;
2526use databend_common_catalog:: table:: Table ;
2627use databend_common_exception:: ErrorCode ;
2728use databend_common_exception:: Result ;
29+ use databend_common_expression:: BlockRowIndex ;
2830use databend_common_expression:: DataBlock ;
2931use databend_common_expression:: TableSchemaRef ;
3032use databend_common_storage:: ColumnNodes ;
3133use databend_storages_common_cache:: LoadParams ;
3234use databend_storages_common_io:: ReadSettings ;
3335use databend_storages_common_table_meta:: meta:: BlockMeta ;
3436use databend_storages_common_table_meta:: meta:: TableSnapshot ;
37+ use futures_util:: future;
3538use itertools:: Itertools ;
3639
3740use super :: fuse_rows_fetcher:: RowsFetcher ;
@@ -54,8 +57,8 @@ pub(super) struct ParquetRowsFetcher<const BLOCKING_IO: bool> {
5457 part_map : HashMap < u64 , PartInfoPtr > ,
5558 segment_blocks_cache : HashMap < u64 , Vec < Arc < BlockMeta > > > ,
5659
57- // To control the parallelism of fetching blocks.
58- max_threads : usize ,
60+ semaphore : Arc < Semaphore > ,
61+ runtime : Arc < Runtime > ,
5962}
6063
6164#[ async_trait:: async_trait]
@@ -68,6 +71,7 @@ impl<const BLOCKING_IO: bool> RowsFetcher for ParquetRowsFetcher<BLOCKING_IO> {
6871
6972 fn clear_cache ( & mut self ) {
7073 self . part_map . clear ( ) ;
74+ self . segment_blocks_cache . clear ( ) ;
7175 }
7276
7377 #[ async_backtrace:: framed]
@@ -77,72 +81,57 @@ impl<const BLOCKING_IO: bool> RowsFetcher for ParquetRowsFetcher<BLOCKING_IO> {
7781 let num_rows = row_ids. len ( ) ;
7882 let mut part_set = HashSet :: new ( ) ;
7983 let mut row_set = Vec :: with_capacity ( num_rows) ;
84+ let mut block_row_indices = HashMap :: new ( ) ;
8085 for row_id in row_ids {
8186 let ( prefix, idx) = split_row_id ( * row_id) ;
8287 part_set. insert ( prefix) ;
8388 row_set. push ( ( prefix, idx) ) ;
89+ block_row_indices
90+ . entry ( prefix)
91+ . or_insert ( Vec :: new ( ) )
92+ . push ( ( 0u32 , idx as u32 , 1usize ) ) ;
8493 }
8594
8695 // Read blocks in `prefix` order.
8796 let part_set = part_set. into_iter ( ) . sorted ( ) . collect :: < Vec < _ > > ( ) ;
88- let idx_map = part_set
97+ let mut idx_map = part_set
8998 . iter ( )
9099 . enumerate ( )
91- . map ( |( i, p) | ( * p, i ) )
100+ . map ( |( i, p) | ( * p, ( i , 0 ) ) )
92101 . collect :: < HashMap < _ , _ > > ( ) ;
93- // parts_per_thread = num_parts / max_threads
94- // remain = num_parts % max_threads
95- // task distribution:
96- // Part number of each task | Task number
97- // ------------------------------------------------------
98- // parts_per_thread + 1 | remain
99- // parts_per_thread | max_threads - remain
100- let num_parts = part_set. len ( ) ;
101- let mut tasks = Vec :: with_capacity ( self . max_threads ) ;
102- // Fetch blocks in parallel.
103- let part_size = num_parts / self . max_threads ;
104- let remainder = num_parts % self . max_threads ;
105- let mut begin = 0 ;
106- for i in 0 ..self . max_threads {
107- let end = if i < remainder {
108- begin + part_size + 1
109- } else {
110- begin + part_size
111- } ;
112- if begin == end {
113- break ;
114- }
115- let parts = part_set[ begin..end]
116- . iter ( )
117- . map ( |idx| self . part_map [ idx] . clone ( ) )
118- . collect :: < Vec < _ > > ( ) ;
119- tasks. push ( Self :: fetch_blocks (
102+
103+ let mut tasks = Vec :: with_capacity ( part_set. len ( ) ) ;
104+ for part in & part_set {
105+ tasks. push ( Self :: fetch_block (
120106 self . reader . clone ( ) ,
121- parts ,
107+ self . part_map [ part ] . clone ( ) ,
122108 self . settings ,
109+ block_row_indices[ part] . clone ( ) ,
123110 ) ) ;
124- begin = end;
125111 }
126112
127- let num_task = tasks. len ( ) ;
128- let blocks = execute_futures_in_parallel (
129- tasks,
130- num_task,
131- num_task * 2 ,
132- "parqeut rows fetch" . to_string ( ) ,
133- )
134- . await ?
135- . into_iter ( )
136- . collect :: < Result < Vec < _ > > > ( ) ?
137- . into_iter ( )
138- . flatten ( )
139- . collect :: < Vec < _ > > ( ) ;
113+ let tasks = tasks. into_iter ( ) . map ( |v| {
114+ |permit| async {
115+ let r = v. await ;
116+ drop ( permit) ;
117+ r
118+ }
119+ } ) ;
120+ let join_handlers = self
121+ . runtime
122+ . try_spawn_batch_with_owned_semaphore ( self . semaphore . clone ( ) , tasks)
123+ . await ?;
124+
125+ let joint = future:: try_join_all ( join_handlers) . await ?;
126+ let blocks = joint. into_iter ( ) . collect :: < Result < Vec < _ > > > ( ) ?;
140127 // Take result rows from blocks.
141128 let indices = row_set
142129 . iter ( )
143- . map ( |( prefix, row_idx) | {
144- let block_idx = idx_map[ prefix] ;
145- ( block_idx as u32 , * row_idx as u32 , 1_usize )
130+ . map ( |( prefix, _) | {
131+ let ( block_idx, row_idx_in_block) = idx_map. get_mut ( prefix) . unwrap ( ) ;
132+ let row_idx = * row_idx_in_block;
133+ * row_idx_in_block += 1 ;
134+ ( * block_idx as u32 , row_idx as u32 , 1_usize )
146135 } )
147136 . collect :: < Vec < _ > > ( ) ;
148137
@@ -169,7 +158,8 @@ impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {
169158 projection : Projection ,
170159 reader : Arc < BlockReader > ,
171160 settings : ReadSettings ,
172- max_threads : usize ,
161+ semaphore : Arc < Semaphore > ,
162+ runtime : Arc < Runtime > ,
173163 ) -> Self {
174164 let schema = table. schema ( ) ;
175165 let segment_reader =
@@ -184,7 +174,8 @@ impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {
184174 settings,
185175 part_map : HashMap :: new ( ) ,
186176 segment_blocks_cache : HashMap :: new ( ) ,
187- max_threads,
177+ semaphore,
178+ runtime,
188179 }
189180 }
190181
@@ -238,38 +229,31 @@ impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {
238229 }
239230
240231 #[ async_backtrace:: framed]
241- async fn fetch_blocks (
232+ async fn fetch_block (
242233 reader : Arc < BlockReader > ,
243- parts : Vec < PartInfoPtr > ,
234+ part : PartInfoPtr ,
244235 settings : ReadSettings ,
245- ) -> Result < Vec < DataBlock > > {
246- let mut chunks = Vec :: with_capacity ( parts. len ( ) ) ;
247- if BLOCKING_IO {
248- for part in parts. iter ( ) {
249- let chunk = reader. sync_read_columns_data_by_merge_io ( & settings, part, & None ) ?;
250- chunks. push ( chunk) ;
251- }
236+ block_row_indices : Vec < BlockRowIndex > ,
237+ ) -> Result < DataBlock > {
238+ let chunk = if BLOCKING_IO {
239+ reader. sync_read_columns_data_by_merge_io ( & settings, & part, & None ) ?
252240 } else {
253- for part in parts. iter ( ) {
254- let part = FuseBlockPartInfo :: from_part ( part) ?;
255- let chunk = reader
256- . read_columns_data_by_merge_io (
257- & settings,
258- & part. location ,
259- & part. columns_meta ,
260- & None ,
261- )
262- . await ?;
263- chunks. push ( chunk) ;
264- }
265- }
266- let fetched_blocks = chunks
267- . into_iter ( )
268- . zip ( parts. iter ( ) )
269- . map ( |( chunk, part) | Self :: build_block ( & reader, part, chunk) )
270- . collect :: < Result < Vec < _ > > > ( ) ?;
271-
272- Ok ( fetched_blocks)
241+ let fuse_part = FuseBlockPartInfo :: from_part ( & part) ?;
242+ reader
243+ . read_columns_data_by_merge_io (
244+ & settings,
245+ & fuse_part. location ,
246+ & fuse_part. columns_meta ,
247+ & None ,
248+ )
249+ . await ?
250+ } ;
251+ let block = Self :: build_block ( & reader, & part, chunk) ?;
252+ Ok ( DataBlock :: take_blocks (
253+ & [ block] ,
254+ & block_row_indices,
255+ block_row_indices. len ( ) ,
256+ ) )
273257 }
274258
275259 fn build_block (
0 commit comments