@@ -19,7 +19,6 @@ use databend_common_base::base::tokio;
1919use databend_common_base:: base:: tokio:: sync:: mpsc:: channel;
2020use databend_common_base:: base:: tokio:: sync:: mpsc:: Receiver ;
2121use databend_common_exception:: Result ;
22- use databend_common_expression:: block_debug:: pretty_format_blocks;
2322use databend_common_expression:: types:: Int32Type ;
2423use databend_common_expression:: DataBlock ;
2524use databend_common_expression:: DataField ;
@@ -34,130 +33,16 @@ use databend_common_pipeline_core::PipeItem;
3433use databend_common_pipeline_core:: Pipeline ;
3534use databend_common_pipeline_sinks:: SyncSenderSink ;
3635use databend_common_pipeline_sources:: BlocksSource ;
37- use databend_common_pipeline_transforms:: processors:: add_k_way_merge_sort;
3836use databend_query:: pipelines:: executor:: ExecutorSettings ;
3937use databend_query:: pipelines:: executor:: QueryPipelineExecutor ;
4038use databend_query:: sessions:: QueryContext ;
4139use databend_query:: test_kits:: TestFixture ;
42- use itertools:: Itertools ;
43- use parking_lot:: Mutex ;
4440use rand:: rngs:: ThreadRng ;
4541use rand:: Rng ;
4642
47- #[ tokio:: test( flavor = "multi_thread" , worker_threads = 1 ) ]
48- async fn test_k_way_merge_sort ( ) -> Result < ( ) > {
49- let fixture = TestFixture :: setup ( ) . await ?;
50- let ctx = fixture. new_query_ctx ( ) . await ?;
51-
52- let worker = 3 ;
53- let block_size = 4 ;
54- let limit = None ;
55- let ( data, expected) = basic_test_data ( None ) ;
56- let ( executor, mut rx) = create_pipeline ( ctx, data, worker, block_size, limit) ?;
57-
58- executor. execute ( ) ?;
59-
60- let mut got: Vec < DataBlock > = Vec :: new ( ) ;
61- while !rx. is_empty ( ) {
62- got. push ( rx. recv ( ) . await . unwrap ( ) ?) ;
63- }
64-
65- check_result ( got, expected) ;
66-
67- Ok ( ( ) )
68- }
69-
70- #[ tokio:: test( flavor = "multi_thread" , worker_threads = 1 ) ]
71- async fn test_k_way_merge_sort_fuzz ( ) -> Result < ( ) > {
72- let mut rng = rand:: thread_rng ( ) ;
73- let fixture = TestFixture :: setup ( ) . await ?;
74-
75- for _ in 0 ..10 {
76- let ctx = fixture. new_query_ctx ( ) . await ?;
77- run_fuzz ( ctx, & mut rng, false ) . await ?;
78- }
79-
80- for _ in 0 ..10 {
81- let ctx = fixture. new_query_ctx ( ) . await ?;
82- run_fuzz ( ctx, & mut rng, true ) . await ?;
83- }
84- Ok ( ( ) )
85- }
86-
87- async fn run_fuzz ( ctx : Arc < QueryContext > , rng : & mut ThreadRng , with_limit : bool ) -> Result < ( ) > {
88- let worker = rng. gen_range ( 1 ..=5 ) ;
89- let block_size = rng. gen_range ( 1 ..=20 ) ;
90- let ( data, expected, limit) = random_test_data ( rng, with_limit) ;
91-
92- // println!("\nwith_limit {with_limit}");
93- // for (input, blocks) in data.iter().enumerate() {
94- // println!("intput {input}");
95- // for b in blocks {
96- // println!("{:?}", b.columns()[0].value);
97- // }
98- // }
99-
100- let ( executor, mut rx) = create_pipeline ( ctx, data, worker, block_size, limit) ?;
101- executor. execute ( ) ?;
102-
103- let mut got: Vec < DataBlock > = Vec :: new ( ) ;
104- while !rx. is_empty ( ) {
105- got. push ( rx. recv ( ) . await . unwrap ( ) ?) ;
106- }
107-
108- check_result ( got, expected) ;
109-
110- Ok ( ( ) )
111- }
112-
113- fn create_pipeline (
114- ctx : Arc < QueryContext > ,
115- data : Vec < Vec < DataBlock > > ,
116- worker : usize ,
117- block_size : usize ,
118- limit : Option < usize > ,
119- ) -> Result < ( Arc < QueryPipelineExecutor > , Receiver < Result < DataBlock > > ) > {
120- let mut pipeline = Pipeline :: create ( ) ;
121-
122- let data_type = data[ 0 ] [ 0 ] . get_by_offset ( 0 ) . data_type . clone ( ) ;
123- let source_pipe = create_source_pipe ( ctx, data) ?;
124- pipeline. add_pipe ( source_pipe) ;
125-
126- let schema = DataSchemaRefExt :: create ( vec ! [ DataField :: new( "a" , data_type) ] ) ;
127- let sort_desc = Arc :: new ( vec ! [ SortColumnDescription {
128- offset: 0 ,
129- asc: true ,
130- nulls_first: true ,
131- is_nullable: false ,
132- } ] ) ;
133- add_k_way_merge_sort (
134- & mut pipeline,
135- schema,
136- worker,
137- block_size,
138- limit,
139- sort_desc,
140- false ,
141- true ,
142- ) ?;
143-
144- let ( mut rx, sink_pipe) = create_sink_pipe ( 1 ) ?;
145- let rx = rx. pop ( ) . unwrap ( ) ;
146- pipeline. add_pipe ( sink_pipe) ;
147- pipeline. set_max_threads ( 3 ) ;
148-
149- let settings = ExecutorSettings {
150- query_id : Arc :: new ( "" . to_string ( ) ) ,
151- max_execute_time_in_seconds : Default :: default ( ) ,
152- enable_queries_executor : false ,
153- max_threads : 8 ,
154- executor_node_id : "" . to_string ( ) ,
155- } ;
156- let executor = QueryPipelineExecutor :: create ( pipeline, settings) ?;
157- Ok ( ( executor, rx) )
158- }
159-
16043fn create_source_pipe ( ctx : Arc < QueryContext > , data : Vec < Vec < DataBlock > > ) -> Result < Pipe > {
44+ use parking_lot:: Mutex ;
45+
16146 let size = data. len ( ) ;
16247 let mut items = Vec :: with_capacity ( size) ;
16348
@@ -179,7 +64,7 @@ fn create_source_pipe(ctx: Arc<QueryContext>, data: Vec<Vec<DataBlock>>) -> Resu
17964fn create_sink_pipe ( size : usize ) -> Result < ( Vec < Receiver < Result < DataBlock > > > , Pipe ) > {
18065 let mut rxs = Vec :: with_capacity ( size) ;
18166 let mut items = Vec :: with_capacity ( size) ;
182- for _index in 0 ..size {
67+ for _ in 0 ..size {
18368 let input = InputPort :: create ( ) ;
18469 let ( tx, rx) = channel ( 1000 ) ;
18570 rxs. push ( rx) ;
@@ -193,21 +78,11 @@ fn create_sink_pipe(size: usize) -> Result<(Vec<Receiver<Result<DataBlock>>>, Pi
19378 Ok ( ( rxs, Pipe :: create ( size, 0 , items) ) )
19479}
19580
196- /// Returns (input, expected)
197- pub fn basic_test_data ( limit : Option < usize > ) -> ( Vec < Vec < DataBlock > > , DataBlock ) {
198- let data = vec ! [
199- vec![ vec![ 1 , 2 , 3 , 4 ] , vec![ 4 , 5 , 6 , 7 ] ] ,
200- vec![ vec![ 1 , 1 , 1 , 1 ] , vec![ 1 , 10 , 100 , 2000 ] ] ,
201- vec![ vec![ 0 , 2 , 4 , 5 ] ] ,
202- ] ;
203-
204- prepare_input_and_result ( data, limit)
205- }
206-
207- fn prepare_input_and_result (
81+ fn prepare_multi_input_and_result (
20882 data : Vec < Vec < Vec < i32 > > > ,
20983 limit : Option < usize > ,
21084) -> ( Vec < Vec < DataBlock > > , DataBlock ) {
85+ use itertools:: Itertools ;
21186 let input = data
21287 . clone ( )
21388 . into_iter ( )
@@ -229,7 +104,17 @@ fn prepare_input_and_result(
229104 ( input, result)
230105}
231106
107+ fn prepare_single_input_and_result (
108+ data : Vec < Vec < i32 > > ,
109+ limit : Option < usize > ,
110+ ) -> ( Vec < DataBlock > , DataBlock ) {
111+ let ( mut input, expected) = prepare_multi_input_and_result ( vec ! [ data] , limit) ;
112+ ( input. remove ( 0 ) , expected)
113+ }
114+
232115fn check_result ( result : Vec < DataBlock > , expected : DataBlock ) {
116+ use databend_common_expression:: block_debug:: pretty_format_blocks;
117+
233118 if expected. is_empty ( ) {
234119 if !result. is_empty ( ) && !DataBlock :: concat ( & result) . unwrap ( ) . is_empty ( ) {
235120 panic ! (
@@ -240,46 +125,15 @@ fn check_result(result: Vec<DataBlock>, expected: DataBlock) {
240125 return ;
241126 }
242127
243- let result_rows: usize = result. iter ( ) . map ( |v| v. num_rows ( ) ) . sum ( ) ;
244- let result = pretty_format_blocks ( & result) . unwrap ( ) ;
245128 let expected_rows = expected. num_rows ( ) ;
246129 let expected = pretty_format_blocks ( & [ expected] ) . unwrap ( ) ;
130+ let result_rows: usize = result. iter ( ) . map ( |v| v. num_rows ( ) ) . sum ( ) ;
131+ let result = pretty_format_blocks ( & result) . unwrap ( ) ;
247132 assert_eq ! (
248133 expected, result,
249- "\n expected (num_rows = {}):\n {}\n actual (num_rows = {}):\n {}" ,
250- expected_rows, expected, result_rows, result
134+ "\n expected (num_rows = {expected_rows}):\n {expected}\n actual (num_rows = {result_rows}):\n {result}" ,
251135 ) ;
252136}
253137
254- fn random_test_data (
255- rng : & mut ThreadRng ,
256- with_limit : bool ,
257- ) -> ( Vec < Vec < DataBlock > > , DataBlock , Option < usize > ) {
258- let random_batch_size = rng. gen_range ( 1 ..=10 ) ;
259- let random_num_streams = rng. gen_range ( 5 ..=10 ) ;
260-
261- let random_data = ( 0 ..random_num_streams)
262- . map ( |_| {
263- let random_num_blocks = rng. gen_range ( 1 ..=10 ) ;
264- let mut data = ( 0 ..random_batch_size * random_num_blocks)
265- . map ( |_| rng. gen_range ( 0 ..=1000 ) )
266- . collect :: < Vec < _ > > ( ) ;
267- data. sort ( ) ;
268- data. chunks ( random_batch_size)
269- . map ( |v| v. to_vec ( ) )
270- . collect :: < Vec < _ > > ( )
271- } )
272- . collect :: < Vec < _ > > ( ) ;
273-
274- let num_rows = random_data
275- . iter ( )
276- . map ( |v| v. iter ( ) . map ( |v| v. len ( ) ) . sum :: < usize > ( ) )
277- . sum :: < usize > ( ) ;
278- let limit = if with_limit {
279- Some ( rng. gen_range ( 0 ..=num_rows) )
280- } else {
281- None
282- } ;
283- let ( input, expected) = prepare_input_and_result ( random_data, limit) ;
284- ( input, expected, limit)
285- }
138+ mod k_way;
139+ mod spill;
0 commit comments