Skip to content

Commit 33c96fe

Browse files
authored
fix(query): optimize the cost estimation of some query plans (#18926)
* fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans * fix(query): optimize the cost estimation of some query plans
1 parent 76eadbc commit 33c96fe

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1248
-1244
lines changed

src/query/service/src/pipelines/processors/transforms/hash_join/transform_hash_join_probe.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use crate::pipelines::processors::transforms::hash_join::transform_hash_join_bui
2828
use crate::pipelines::processors::transforms::hash_join::HashJoinProbeState;
2929
use crate::pipelines::processors::transforms::hash_join::HashJoinSpiller;
3030
use crate::pipelines::processors::transforms::hash_join::ProbeState;
31+
use crate::pipelines::processors::transforms::HashJoinHashTable;
3132
use crate::pipelines::processors::Event;
3233
use crate::pipelines::processors::InputPort;
3334
use crate::pipelines::processors::OutputPort;
@@ -112,7 +113,6 @@ pub struct TransformHashJoinProbe {
112113
partition_id_to_restore: usize,
113114

114115
step: Step,
115-
step_logs: Vec<Step>,
116116
}
117117

118118
impl TransformHashJoinProbe {
@@ -176,7 +176,6 @@ impl TransformHashJoinProbe {
176176
spiller,
177177
partition_id_to_restore: 0,
178178
step: Step::Async(AsyncStep::WaitBuild),
179-
step_logs: vec![Step::Async(AsyncStep::WaitBuild)],
180179
}))
181180
}
182181

@@ -192,7 +191,6 @@ impl TransformHashJoinProbe {
192191
}
193192
};
194193
self.step = step.clone();
195-
self.step_logs.push(step);
196194
Ok(event)
197195
}
198196

@@ -526,6 +524,10 @@ impl TransformHashJoinProbe {
526524
.continue_build_watcher
527525
.send(true)
528526
.map_err(|_| ErrorCode::TokioError("continue_build_watcher channel is closed"))?;
527+
528+
self.join_probe_state.hash_join_state.reset();
529+
let hashtable = unsafe { &mut *self.join_probe_state.hash_join_state.hash_table.get() };
530+
*hashtable = HashJoinHashTable::Null;
529531
}
530532
Ok(())
531533
}

src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_optimized.txt

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ Exchange(Merge)
1414
└── EvalScalar
1515
├── scalars: [a.a0d (#0) AS (#0), a.a0k (#7) AS (#7), a.a0m (#9) AS (#9), c.a5m (#144) AS (#144), if(eq(d.a1v (#154), '603020'), 1, 0) AS (#166), b.a2t (#70) AS (#171), c.a4m (#118) AS (#172), d.a5w (#156) AS (#173)]
1616
└── Join(Inner)
17-
├── build keys: [c.a0m (#149)]
18-
├── probe keys: [a.a0m (#9)]
19-
├── other filters: []
17+
├── build keys: [b.a0k (#48), b.a0n (#50)]
18+
├── probe keys: [a.a0k (#7), a.a0n (#10)]
19+
├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))]
2020
├── Join(Inner)
21-
│ ├── build keys: [b.a0k (#48), b.a0n (#50)]
22-
│ ├── probe keys: [a.a0k (#7), a.a0n (#10)]
23-
│ ├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))]
21+
│ ├── build keys: [c.a0m (#149)]
22+
│ ├── probe keys: [a.a0m (#9)]
23+
│ ├── other filters: []
2424
│ ├── Join(Inner)
2525
│ │ ├── build keys: [d.a5t (#151)]
2626
│ │ ├── probe keys: [a.a0l (#8)]
@@ -37,17 +37,17 @@ Exchange(Merge)
3737
│ │ ├── order by: []
3838
│ │ └── limit: NONE
3939
│ └── Exchange(Broadcast)
40-
│ └── Scan
41-
│ ├── table: default.a1z (#1)
42-
│ ├── filters: [eq(a1z.a2t (#70), '624100')]
43-
│ ├── order by: []
44-
│ └── limit: NONE
40+
│ └── EvalScalar
41+
│ ├── scalars: [CAST(c.a0m (#74) AS String NULL) AS (#149)]
42+
│ └── Scan
43+
│ ├── table: default.a2x (#2)
44+
│ ├── filters: [eq(substring(a2x.a4m (#118), 20, 1), '1')]
45+
│ ├── order by: []
46+
│ └── limit: NONE
4547
└── Exchange(Broadcast)
46-
└── EvalScalar
47-
├── scalars: [CAST(c.a0m (#74) AS String NULL) AS (#149)]
48-
└── Scan
49-
├── table: default.a2x (#2)
50-
├── filters: [eq(substring(a2x.a4m (#118), 20, 1), '1')]
51-
├── order by: []
52-
└── limit: NONE
48+
└── Scan
49+
├── table: default.a1z (#1)
50+
├── filters: [eq(a1z.a2t (#70), '624100')]
51+
├── order by: []
52+
└── limit: NONE
5353

src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,76 +4,76 @@ Exchange
44
└── EvalScalar
55
├── output columns: [sell_mnt = 0 (#170)]
66
├── expressions: [t.sell_mnt (#169) = 0]
7-
├── estimated rows: 7119376617326129446912.00
7+
├── estimated rows: 2966406923885887436619776.00
88
└── EvalScalar
99
├── output columns: [sell_mnt (#169)]
1010
├── expressions: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167) / CAST(if(CAST(count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168) = 0 AS Boolean NULL), 1, count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168)) AS UInt64 NULL) + 3]
11-
├── estimated rows: 7119376617326129446912.00
11+
├── estimated rows: 2966406923885887436619776.00
1212
└── AggregateFinal
1313
├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)]
1414
├── group by: [a0d, a0k, a0m, a5m]
1515
├── aggregate functions: [sum(sum_arg_0), count()]
16-
├── estimated rows: 7119376617326129446912.00
16+
├── estimated rows: 2966406923885887436619776.00
1717
└── Exchange
1818
├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)]
1919
├── exchange type: Hash(0, 1, 2, 3)
2020
└── AggregatePartial
2121
├── group by: [a0d, a0k, a0m, a5m]
2222
├── aggregate functions: [sum(sum_arg_0), count()]
23-
├── estimated rows: 7119376617326129446912.00
23+
├── estimated rows: 2966406923885887436619776.00
2424
└── EvalScalar
2525
├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144), sum_arg_0 (#166)]
2626
├── expressions: [if(d.a1v (#154) = '603020', 1, 0)]
27-
├── estimated rows: 7119376617326129446912.00
27+
├── estimated rows: 8899220771657662846730240.00
2828
└── HashJoin
2929
├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154), c.a5m (#144)]
3030
├── join type: INNER
31-
├── build keys: [c.a0m (#149)]
32-
├── probe keys: [a.a0m (#9)]
33-
├── keys is null equal: [false]
34-
├── filters: []
31+
├── build keys: [b.a0k (#48), b.a0n (#50)]
32+
├── probe keys: [a.a0k (#7), a.a0n (#10)]
33+
├── keys is null equal: [false, false]
34+
├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)]
3535
├── build join filters:
36-
│ └── filter id:3, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max
37-
├── estimated rows: 7119376617326129446912.00
36+
│ ├── filter id:2, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max
37+
│ └── filter id:3, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max
38+
├── estimated rows: 8899220771657662846730240.00
3839
├── Exchange(Build)
39-
│ ├── output columns: [c.a5m (#144), a0m (#149)]
40+
│ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)]
4041
│ ├── exchange type: Broadcast
41-
│ └── EvalScalar
42-
│ ├── output columns: [c.a5m (#144), a0m (#149)]
43-
│ ├── expressions: [CAST(c.a0m (#74) AS String NULL)]
44-
│ ├── estimated rows: 63773.60
45-
│ └── TableScan
46-
│ ├── table: default.default.a2x
47-
│ ├── output columns: [a0m (#74), a5m (#144)]
48-
│ ├── read rows: 0
49-
│ ├── read size: 0
50-
│ ├── partitions total: 0
51-
│ ├── partitions scanned: 0
52-
│ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1')], limit: NONE]
53-
│ └── estimated rows: 63773.60
42+
│ └── TableScan
43+
│ ├── table: default.default.a1z
44+
│ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)]
45+
│ ├── read rows: 0
46+
│ ├── read size: 0
47+
│ ├── partitions total: 0
48+
│ ├── partitions scanned: 0
49+
│ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE]
50+
│ └── estimated rows: 56867315.00
5451
└── HashJoin(Probe)
55-
├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154)]
52+
├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154), c.a5m (#144)]
5653
├── join type: INNER
57-
├── build keys: [b.a0k (#48), b.a0n (#50)]
58-
├── probe keys: [a.a0k (#7), a.a0n (#10)]
59-
├── keys is null equal: [false, false]
60-
├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)]
54+
├── build keys: [c.a0m (#149)]
55+
├── probe keys: [a.a0m (#9)]
56+
├── keys is null equal: [false]
57+
├── filters: []
6158
├── build join filters:
62-
│ ├── filter id:1, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max
63-
│ └── filter id:2, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max
64-
├── estimated rows: 111635169056257280.00
59+
│ └── filter id:1, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max
60+
├── estimated rows: 156490960961629056.00
6561
├── Exchange(Build)
66-
│ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)]
62+
│ ├── output columns: [c.a5m (#144), a0m (#149)]
6763
│ ├── exchange type: Broadcast
68-
│ └── TableScan
69-
│ ├── table: default.default.a1z
70-
│ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)]
71-
│ ├── read rows: 0
72-
│ ├── read size: 0
73-
│ ├── partitions total: 0
74-
│ ├── partitions scanned: 0
75-
│ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE]
76-
│ └── estimated rows: 45493.85
64+
│ └── EvalScalar
65+
│ ├── output columns: [c.a5m (#144), a0m (#149)]
66+
│ ├── expressions: [CAST(c.a0m (#74) AS String NULL)]
67+
│ ├── estimated rows: 63773.60
68+
│ └── TableScan
69+
│ ├── table: default.default.a2x
70+
│ ├── output columns: [a0m (#74), a5m (#144)]
71+
│ ├── read rows: 0
72+
│ ├── read size: 0
73+
│ ├── partitions total: 0
74+
│ ├── partitions scanned: 0
75+
│ ├── push downs: [filters: [is_true(substr(a2x.a4m (#118), 20, 1) = '1')], limit: NONE]
76+
│ └── estimated rows: 63773.60
7777
└── HashJoin(Probe)
7878
├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154)]
7979
├── join type: INNER
@@ -104,6 +104,6 @@ Exchange
104104
├── partitions total: 0
105105
├── partitions scanned: 0
106106
├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE]
107-
├── apply join filters: [#3, #1, #2, #0]
107+
├── apply join filters: [#2, #3, #1, #0]
108108
└── estimated rows: 3042216421.58
109109

0 commit comments

Comments
 (0)