Skip to content

Commit d0d7a31

Browse files
authored
fix(query): fix join subquery scalar rewrite (#17441)
* fix(query): fix join subquery scalar rewrite * fix(query): fix join subquery scalar rewrite * fix(query): fix join subquery scalar rewrite * fix(query): fix join subquery scalar rewrite * fix(query): fix join subquery scalar rewrite * update
1 parent 17f4491 commit d0d7a31

File tree

14 files changed

+456
-362
lines changed

14 files changed

+456
-362
lines changed

benchmark/tpcds/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ To run the TPC-DS Benchmark, first build `databend-sqllogictests` binary.
1717
Then, execute the following command in your shell:
1818

1919
```shell
20-
databend-sqllogictests --handlers mysql --database tpcds --run_dir tpcds --bench
20+
databend-sqllogictests --handlers mysql --database tpcds --run_dir tpcds --bench --run_file queries.test
2121
```

benchmark/tpcds/load_data.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ do
6060
echo "$t"
6161
fp="`pwd`/data/$t.csv"
6262
echo "copy into ${MYSQL_DATABASE}.$t from 'fs://${fp}' file_format = (type = CSV skip_header = 1 field_delimiter = '|' record_delimiter = '\n')" | $BENDSQL_CLIENT_CONNECT
63+
echo "analyze table ${MYSQL_DATABASE}.$t" | $BENDSQL_CLIENT_CONNECT
6364
done
6465

6566

benchmark/tpch/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ To run the TPC-H Benchmark, first build `databend-sqllogictests` binary.
2929
Then, execute the following command in your shell:
3030

3131
```shell
32-
databend-sqllogictests --handlers mysql --database tpch --run_dir tpch --bench
32+
databend-sqllogictests --handlers mysql --database tpch --run_dir tpch --bench --run_file queries.test
3333
```
3434

3535
## More

benchmark/tpch/load_data.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,5 @@ do
117117
echo "$t"
118118
fp="`pwd`/data/$t.tbl"
119119
echo "copy into ${MYSQL_DATABASE}.$t from 'fs://${fp}' file_format = (type = CSV skip_header = 1 field_delimiter = '|' record_delimiter = '\n')" | $BENDSQL_CLIENT_CONNECT
120+
echo "analyze table ${MYSQL_DATABASE}.$t" | $BENDSQL_CLIENT_CONNECT
120121
done

src/common/storage/src/statistics.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,21 @@ impl Datum {
182182
))),
183183
}
184184
}
185+
186+
pub fn can_compare(&self, other: &Self) -> bool {
187+
matches!(
188+
(self, other),
189+
(Datum::Bool(_), Datum::Bool(_))
190+
| (Datum::Int(_), Datum::Int(_))
191+
| (Datum::Int(_), Datum::UInt(_))
192+
| (Datum::Int(_), Datum::Float(_))
193+
| (Datum::UInt(_), Datum::UInt(_))
194+
| (Datum::UInt(_), Datum::Int(_))
195+
| (Datum::UInt(_), Datum::Float(_))
196+
| (Datum::Float(_), Datum::Float(_))
197+
| (Datum::Float(_), Datum::Int(_))
198+
| (Datum::Float(_), Datum::UInt(_))
199+
| (Datum::Bytes(_), Datum::Bytes(_))
200+
)
201+
}
185202
}

src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,12 @@ impl SubqueryRewriter {
271271
let mut join_type = JoinType::LeftSingle;
272272
if subquery.contain_agg.unwrap() {
273273
let rel_expr = RelExpr::with_s_expr(&subquery.subquery);
274-
let has_precise_cardinality = rel_expr
274+
let card = rel_expr
275275
.derive_cardinality()?
276276
.statistics
277-
.precise_cardinality
278-
.is_some();
279-
if has_precise_cardinality {
277+
.precise_cardinality;
278+
279+
if card.is_some() {
280280
join_type = JoinType::Left;
281281
}
282282
}

src/query/sql/src/planner/optimizer/property/selectivity.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,10 @@ impl<'a> SelectivityEstimator<'a> {
442442
let lower_bound = bucket.lower_bound();
443443
let upper_bound = bucket.upper_bound();
444444

445+
if !const_datum.can_compare(lower_bound) {
446+
return Ok(DEFAULT_SELECTIVITY);
447+
}
448+
445449
let const_gte_upper_bound = matches!(
446450
const_datum.compare(upper_bound)?,
447451
Ordering::Greater | Ordering::Equal

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,10 +3169,10 @@ impl<'a> TypeChecker<'a> {
31693169
if let SetExpr::Select(select_stmt) = &subquery.body {
31703170
if typ == SubqueryType::Scalar {
31713171
let select = &select_stmt.select_list[0];
3172-
if let SelectTarget::AliasedExpr { expr, .. } = select {
3172+
if matches!(select, SelectTarget::AliasedExpr { .. }) {
31733173
// Check if contain aggregation function
31743174
#[derive(Visitor)]
3175-
#[visitor(ASTFunctionCall(enter))]
3175+
#[visitor(Expr(enter), ASTFunctionCall(enter))]
31763176
struct AggFuncVisitor {
31773177
contain_agg: bool,
31783178
}
@@ -3182,9 +3182,13 @@ impl<'a> TypeChecker<'a> {
31823182
|| AggregateFunctionFactory::instance()
31833183
.contains(func.name.to_string());
31843184
}
3185+
fn enter_expr(&mut self, expr: &Expr) {
3186+
self.contain_agg = self.contain_agg
3187+
|| matches!(expr, Expr::CountAll { window: None, .. });
3188+
}
31853189
}
31863190
let mut visitor = AggFuncVisitor { contain_agg: false };
3187-
expr.drive(&mut visitor);
3191+
select.drive(&mut visitor);
31883192
contain_agg = Some(visitor.contain_agg);
31893193
}
31903194
}

tests/sqllogictests/src/main.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,10 @@ async fn run_suits(args: SqlLogicTestArgs, client_type: ClientType) -> Result<()
278278
}
279279
}
280280

281-
// lazy load test datas
282-
lazy_prepare_data(&lazy_dirs)?;
281+
if !args.bench {
282+
// lazy load test datas
283+
lazy_prepare_data(&lazy_dirs)?;
284+
}
283285
// lazy run dictionaries containers
284286
let _dict_container = lazy_run_dictionary_containers(&lazy_dirs).await?;
285287

@@ -314,7 +316,9 @@ async fn run_suits(args: SqlLogicTestArgs, client_type: ClientType) -> Result<()
314316
let mut tasks = Vec::with_capacity(files.len());
315317
for file in files {
316318
let client_type = client_type.clone();
317-
tasks.push(async move { run_file_async(&client_type, file.unwrap().path()).await });
319+
tasks.push(async move {
320+
run_file_async(&client_type, args.bench, file.unwrap().path()).await
321+
});
318322
}
319323
// Run all tasks parallel
320324
run_parallel_async(tasks, num_of_tests).await?;
@@ -358,6 +362,7 @@ async fn run_parallel_async(
358362

359363
async fn run_file_async(
360364
client_type: &ClientType,
365+
bench: bool,
361366
filename: impl AsRef<Path>,
362367
) -> std::result::Result<Vec<TestError>, TestError> {
363368
let start = Instant::now();
@@ -374,6 +379,16 @@ async fn run_file_async(
374379
}
375380
// Capture error record and continue to run next records
376381
if let Err(e) = runner.run_async(record).await {
382+
// Skip query result error in bench
383+
if bench
384+
&& matches!(
385+
e.kind(),
386+
sqllogictest::TestErrorKind::QueryResultMismatch { .. }
387+
)
388+
{
389+
continue;
390+
}
391+
377392
if no_fail_fast {
378393
error_records.push(e);
379394
} else {

tests/sqllogictests/suites/mode/standalone/explain/limit.test

Lines changed: 71 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -80,87 +80,91 @@ Limit
8080
├── output columns: [t.number (#0)]
8181
├── limit: 3
8282
├── offset: 0
83-
├── estimated rows: 1.00
83+
├── estimated rows: 0.20
8484
└── Sort
8585
├── output columns: [t.number (#0)]
8686
├── sort keys: [number DESC NULLS LAST]
87-
├── estimated rows: 1.00
87+
├── estimated rows: 0.20
8888
└── AggregateFinal
8989
├── output columns: [t.number (#0)]
9090
├── group by: [number]
9191
├── aggregate functions: []
92-
├── estimated rows: 1.00
92+
├── estimated rows: 0.20
9393
└── AggregatePartial
9494
├── group by: [number]
9595
├── aggregate functions: []
96-
├── estimated rows: 1.00
96+
├── estimated rows: 0.20
9797
├── rank limit: 3
98-
└── HashJoin
98+
└── Filter
9999
├── output columns: [t.number (#0)]
100-
├── join type: INNER
101-
├── build keys: [number (#2), if(true, TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0)]
102-
├── probe keys: [number (#0), CAST(t.number (#0) AS UInt64 NULL)]
103-
├── filters: []
104-
├── estimated rows: 1.00
105-
├── AggregateFinal(Build)
106-
│ ├── output columns: [COUNT(*) (#4), t2.number (#2)]
107-
│ ├── group by: [number]
108-
│ ├── aggregate functions: [count()]
109-
│ ├── estimated rows: 1.00
110-
│ └── AggregatePartial
111-
│ ├── group by: [number]
112-
│ ├── aggregate functions: [count()]
113-
│ ├── estimated rows: 1.00
114-
│ └── HashJoin
115-
│ ├── output columns: [t2.number (#2)]
116-
│ ├── join type: CROSS
117-
│ ├── build keys: []
118-
│ ├── probe keys: []
119-
│ ├── filters: []
120-
│ ├── estimated rows: 1.00
121-
│ ├── TableScan(Build)
122-
│ │ ├── table: default.system.numbers
123-
│ │ ├── output columns: []
124-
│ │ ├── read rows: 1
125-
│ │ ├── read size: < 1 KiB
126-
│ │ ├── partitions total: 1
127-
│ │ ├── partitions scanned: 1
128-
│ │ ├── push downs: [filters: [], limit: NONE]
129-
│ │ └── estimated rows: 1.00
130-
│ └── TableScan(Probe)
131-
│ ├── table: default.system.numbers
132-
│ ├── output columns: [number (#2)]
133-
│ ├── read rows: 1
134-
│ ├── read size: < 1 KiB
135-
│ ├── partitions total: 1
136-
│ ├── partitions scanned: 1
137-
│ ├── push downs: [filters: [], limit: NONE]
138-
│ └── estimated rows: 1.00
139-
└── HashJoin(Probe)
140-
├── output columns: [t.number (#0)]
141-
├── join type: CROSS
142-
├── build keys: []
143-
├── probe keys: []
100+
├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), scalar_subquery_4 (#4), 0))]
101+
├── estimated rows: 0.20
102+
└── HashJoin
103+
├── output columns: [t.number (#0), COUNT(*) (#4)]
104+
├── join type: LEFT OUTER
105+
├── build keys: [number (#2)]
106+
├── probe keys: [CAST(number (#0) AS UInt64 NULL)]
144107
├── filters: []
145108
├── estimated rows: 1.00
146-
├── TableScan(Build)
147-
│ ├── table: default.system.numbers
148-
│ ├── output columns: []
149-
│ ├── read rows: 1
150-
│ ├── read size: < 1 KiB
151-
│ ├── partitions total: 1
152-
│ ├── partitions scanned: 1
153-
│ ├── push downs: [filters: [], limit: NONE]
154-
│ └── estimated rows: 1.00
155-
└── TableScan(Probe)
156-
├── table: default.system.numbers
157-
├── output columns: [number (#0)]
158-
├── read rows: 1
159-
├── read size: < 1 KiB
160-
├── partitions total: 1
161-
├── partitions scanned: 1
162-
├── push downs: [filters: [], limit: NONE]
163-
└── estimated rows: 1.00
109+
├── AggregateFinal(Build)
110+
│ ├── output columns: [COUNT(*) (#4), t2.number (#2)]
111+
│ ├── group by: [number]
112+
│ ├── aggregate functions: [count()]
113+
│ ├── estimated rows: 1.00
114+
│ └── AggregatePartial
115+
│ ├── group by: [number]
116+
│ ├── aggregate functions: [count()]
117+
│ ├── estimated rows: 1.00
118+
│ └── HashJoin
119+
│ ├── output columns: [t2.number (#2)]
120+
│ ├── join type: CROSS
121+
│ ├── build keys: []
122+
│ ├── probe keys: []
123+
│ ├── filters: []
124+
│ ├── estimated rows: 1.00
125+
│ ├── TableScan(Build)
126+
│ │ ├── table: default.system.numbers
127+
│ │ ├── output columns: []
128+
│ │ ├── read rows: 1
129+
│ │ ├── read size: < 1 KiB
130+
│ │ ├── partitions total: 1
131+
│ │ ├── partitions scanned: 1
132+
│ │ ├── push downs: [filters: [], limit: NONE]
133+
│ │ └── estimated rows: 1.00
134+
│ └── TableScan(Probe)
135+
│ ├── table: default.system.numbers
136+
│ ├── output columns: [number (#2)]
137+
│ ├── read rows: 1
138+
│ ├── read size: < 1 KiB
139+
│ ├── partitions total: 1
140+
│ ├── partitions scanned: 1
141+
│ ├── push downs: [filters: [], limit: NONE]
142+
│ └── estimated rows: 1.00
143+
└── HashJoin(Probe)
144+
├── output columns: [t.number (#0)]
145+
├── join type: CROSS
146+
├── build keys: []
147+
├── probe keys: []
148+
├── filters: []
149+
├── estimated rows: 1.00
150+
├── TableScan(Build)
151+
│ ├── table: default.system.numbers
152+
│ ├── output columns: []
153+
│ ├── read rows: 1
154+
│ ├── read size: < 1 KiB
155+
│ ├── partitions total: 1
156+
│ ├── partitions scanned: 1
157+
│ ├── push downs: [filters: [], limit: NONE]
158+
│ └── estimated rows: 1.00
159+
└── TableScan(Probe)
160+
├── table: default.system.numbers
161+
├── output columns: [number (#0)]
162+
├── read rows: 1
163+
├── read size: < 1 KiB
164+
├── partitions total: 1
165+
├── partitions scanned: 1
166+
├── push downs: [filters: [], limit: NONE]
167+
└── estimated rows: 1.00
164168

165169
query T
166170
explain select * from (select count(t1.number) as c1 from numbers(1) as t1 group by number) as t3 left join (select count(t.number) as c from numbers(2) as t group by number) as t4 on t3.c1=t4.c order by t3.c1 limit 1

0 commit comments

Comments
 (0)