@@ -35,6 +35,8 @@ use common_expression::Evaluator;
3535use common_expression:: Expr ;
3636use common_expression:: FieldIndex ;
3737use common_expression:: RemoteExpr ;
38+ use common_expression:: TableDataType ;
39+ use common_expression:: TableField ;
3840use common_expression:: TableSchema ;
3941use common_expression:: Value ;
4042use common_functions:: scalars:: BUILTIN_FUNCTIONS ;
@@ -44,6 +46,8 @@ use common_sql::evaluator::BlockOperator;
4446use storages_common_table_meta:: meta:: Location ;
4547use storages_common_table_meta:: meta:: TableSnapshot ;
4648
49+ use super :: mutation:: MutationSerializeTransform ;
50+ use super :: mutation:: ParquetDeleteSource ;
4751use crate :: operations:: mutation:: MutationAction ;
4852use crate :: operations:: mutation:: MutationPartInfo ;
4953use crate :: operations:: mutation:: MutationSink ;
@@ -148,6 +152,107 @@ impl FuseTable {
148152 Ok ( ( ) )
149153 }
150154
155+ pub async fn do_delete2 (
156+ & self ,
157+ ctx : Arc < dyn TableContext > ,
158+ filter : Option < RemoteExpr < String > > ,
159+ col_indices : Vec < usize > ,
160+ pipeline : & mut Pipeline ,
161+ ) -> Result < ( ) > {
162+ let snapshot_opt = self . read_table_snapshot ( ) . await ?;
163+
164+ // check if table is empty
165+ let snapshot = if let Some ( val) = snapshot_opt {
166+ val
167+ } else {
168+ // no snapshot, no deletion
169+ return Ok ( ( ) ) ;
170+ } ;
171+
172+ if snapshot. summary . row_count == 0 {
173+ // empty snapshot, no deletion
174+ return Ok ( ( ) ) ;
175+ }
176+
177+ let scan_progress = ctx. get_scan_progress ( ) ;
178+ // check if unconditional deletion
179+ if filter. is_none ( ) {
180+ let progress_values = ProgressValues {
181+ rows : snapshot. summary . row_count as usize ,
182+ bytes : snapshot. summary . uncompressed_byte_size as usize ,
183+ } ;
184+ scan_progress. incr ( & progress_values) ;
185+ // deleting the whole table... just a truncate
186+ let purge = false ;
187+ return self . do_truncate ( ctx. clone ( ) , purge) . await ;
188+ }
189+
190+ let filter_expr = filter. unwrap ( ) ;
191+ if col_indices. is_empty ( ) {
192+ // here the situation: filter_expr is not null, but col_indices in empty, which
193+ // indicates the expr being evaluated is unrelated to the value of rows:
194+ // e.g.
195+ // `delete from t where 1 = 1`, `delete from t where now()`,
196+ // or `delete from t where RANDOM()::INT::BOOLEAN`
197+ // if the `filter_expr` is of "constant" nullary :
198+ // for the whole block, whether all of the rows should be kept or dropped,
199+ // we can just return from here, without accessing the block data
200+ if self . try_eval_const ( ctx. clone ( ) , & self . schema ( ) , & filter_expr) ? {
201+ let progress_values = ProgressValues {
202+ rows : snapshot. summary . row_count as usize ,
203+ bytes : snapshot. summary . uncompressed_byte_size as usize ,
204+ } ;
205+ scan_progress. incr ( & progress_values) ;
206+
207+ // deleting the whole table... just a truncate
208+ let purge = false ;
209+ return self . do_truncate ( ctx. clone ( ) , purge) . await ;
210+ }
211+ // do nothing.
212+ return Ok ( ( ) ) ;
213+ }
214+
215+ let projection = Projection :: Columns ( col_indices. clone ( ) ) ;
216+ self . mutation_block_pruning (
217+ ctx. clone ( ) ,
218+ vec ! [ filter_expr. clone( ) ] ,
219+ projection. clone ( ) ,
220+ & snapshot,
221+ )
222+ . await ?;
223+ let block_reader = self . create_block_reader ( projection, ctx. clone ( ) ) ?;
224+
225+ let mut schema = block_reader. schema ( ) . as_ref ( ) . to_owned ( ) ;
226+ schema. add_columns ( & [ TableField :: new ( "_row_exists" , TableDataType :: Boolean ) ] ) ?;
227+ let filter = Arc :: new (
228+ filter_expr
229+ . as_expr ( & BUILTIN_FUNCTIONS )
230+ . project_column_ref ( |name| schema. index_of ( name) . unwrap ( ) ) ,
231+ ) ;
232+
233+ let max_threads = ctx. get_settings ( ) . get_max_threads ( ) ? as usize ;
234+ // Add source pipe.
235+ pipeline. add_source (
236+ |output| {
237+ ParquetDeleteSource :: try_create (
238+ ctx. clone ( ) ,
239+ output,
240+ self ,
241+ filter. clone ( ) ,
242+ block_reader. clone ( ) ,
243+ )
244+ } ,
245+ max_threads,
246+ ) ?;
247+
248+ self . try_add_mutation_transform2 ( ctx. clone ( ) , snapshot. segments . clone ( ) , pipeline) ?;
249+
250+ pipeline. add_sink ( |input| {
251+ MutationSink :: try_create ( self , ctx. clone ( ) , snapshot. clone ( ) , input)
252+ } ) ?;
253+ Ok ( ( ) )
254+ }
255+
151256 pub fn try_eval_const (
152257 & self ,
153258 ctx : Arc < dyn TableContext > ,
@@ -335,6 +440,44 @@ impl FuseTable {
335440 }
336441 }
337442
443+ pub fn try_add_mutation_transform2 (
444+ & self ,
445+ ctx : Arc < dyn TableContext > ,
446+ base_segments : Vec < Location > ,
447+ pipeline : & mut Pipeline ,
448+ ) -> Result < ( ) > {
449+ if pipeline. is_empty ( ) {
450+ return Err ( ErrorCode :: Internal ( "The pipeline is empty." ) ) ;
451+ }
452+
453+ match pipeline. output_len ( ) {
454+ 0 => Err ( ErrorCode :: Internal ( "The output of the last pipe is 0." ) ) ,
455+ last_pipe_size => {
456+ let mut inputs_port = Vec :: with_capacity ( last_pipe_size) ;
457+ for _ in 0 ..last_pipe_size {
458+ inputs_port. push ( InputPort :: create ( ) ) ;
459+ }
460+ let output_port = OutputPort :: create ( ) ;
461+ pipeline. add_pipe ( Pipe :: create ( inputs_port. len ( ) , 1 , vec ! [ PipeItem :: create(
462+ MutationSerializeTransform :: try_create(
463+ ctx,
464+ self . schema( ) ,
465+ inputs_port. clone( ) ,
466+ output_port. clone( ) ,
467+ self . get_operator( ) ,
468+ self . meta_location_generator( ) . clone( ) ,
469+ base_segments,
470+ self . get_block_compact_thresholds( ) ,
471+ ) ?,
472+ inputs_port,
473+ vec![ output_port] ,
474+ ) ] ) ) ;
475+
476+ Ok ( ( ) )
477+ }
478+ }
479+ }
480+
338481 pub fn cluster_stats_gen ( & self , ctx : Arc < dyn TableContext > ) -> Result < ClusterStatsGenerator > {
339482 if self . cluster_key_meta . is_none ( ) {
340483 return Ok ( ClusterStatsGenerator :: default ( ) ) ;
0 commit comments