diff --git a/pkg/planner/core/main_test.go b/pkg/planner/core/main_test.go index 8a589751a2237..a2b7e06ac6cf7 100644 --- a/pkg/planner/core/main_test.go +++ b/pkg/planner/core/main_test.go @@ -36,6 +36,7 @@ func TestMain(m *testing.M) { testDataMap.LoadTestSuiteData("testdata", "index_merge_suite", true) testDataMap.LoadTestSuiteData("testdata", "runtime_filter_generator_suite") testDataMap.LoadTestSuiteData("testdata", "plan_cache_suite") + testDataMap.LoadTestSuiteData("testdata", "decorrelate_limit_suite", true) indexMergeSuiteData = testDataMap["index_merge_suite"] planSuiteUnexportedData = testDataMap["plan_suite_unexported"] @@ -72,3 +73,11 @@ func GetIndexMergeSuiteData() testdata.TestData { func GetRuntimeFilterGeneratorData() testdata.TestData { return testDataMap["runtime_filter_generator_suite"] } + +func GetDecorrelateLimitSuiteData() testdata.TestData { + return testDataMap["decorrelate_limit_suite"] +} + +func GetCascadesSuiteData() testdata.TestData { + return testDataMap["cascades_suite"] +} diff --git a/pkg/planner/core/plan_test.go b/pkg/planner/core/plan_test.go index cbd923c474471..403de0b85bb29 100644 --- a/pkg/planner/core/plan_test.go +++ b/pkg/planner/core/plan_test.go @@ -36,6 +36,7 @@ import ( "github.com/pingcap/tidb/pkg/planner/util/coretestsdk" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/testkit/testdata" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" "github.com/pingcap/tidb/pkg/util/plancodec" @@ -732,3 +733,26 @@ func TestImportIntoBuildPlan(t *testing.T) { require.ErrorIs(t, tk.ExecToErr("IMPORT INTO t3 FROM select * from t2"), infoschema.ErrTableNotExists) } + +func TestDecorrelateLimitOptimization(t *testing.T) { + testkit.RunTestUnderCascadesWithDomain(t, func(t *testing.T, testKit *testkit.TestKit, dom *domain.Domain, cascades, caller string) { + testKit.MustExec("use test") + testKit.MustExec("CREATE TABLE IF NOT EXISTS employees (\n id INT PRIMARY KEY,\n name VARCHAR(50),\n dept_id INT,\n salary DECIMAL(10, 2),\n alias VARCHAR(50)\n)") + testKit.MustExec("CREATE TABLE IF NOT EXISTS employee_notes (\n id INT PRIMARY KEY,\n employee_id INT,\n note TEXT,\n created_at TIMESTAMP,\n INDEX idx_employee_id (employee_id)\n)") + var input []string + var output []struct { + SQL string + Plan []string + } + decorrelateLimitSuiteData := core.GetDecorrelateLimitSuiteData() + decorrelateLimitSuiteData.LoadTestCases(t, &input, &output, cascades, caller) + for i, sql := range input { + plan := testKit.MustQuery(sql) + testdata.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows()) + }) + plan.Check(testkit.Rows(output[i].Plan...)) + } + }) +} diff --git a/pkg/planner/core/rule_decorrelate.go b/pkg/planner/core/rule_decorrelate.go index 75db59192e68a..7af4012c9b9e5 100644 --- a/pkg/planner/core/rule_decorrelate.go +++ b/pkg/planner/core/rule_decorrelate.go @@ -234,218 +234,267 @@ func (s *DecorrelateSolver) optimize(ctx context.Context, p base.LogicalPlan, gr p = join } else if apply.NoDecorrelate { goto NoOptimize - } else if sel, ok := innerPlan.(*logicalop.LogicalSelection); ok { - // If the inner plan is a selection, we add this condition to join predicates. - // Notice that no matter what kind of join is, it's always right. - newConds := make([]expression.Expression, 0, len(sel.Conditions)) - for _, cond := range sel.Conditions { - newConds = append(newConds, cond.Decorrelate(outerPlan.Schema())) - } - apply.AttachOnConds(newConds) - innerPlan = sel.Children()[0] - apply.SetChildren(outerPlan, innerPlan) - return s.optimize(ctx, p, groupByColumn) - } else if m, ok := innerPlan.(*logicalop.LogicalMaxOneRow); ok { - if m.Children()[0].MaxOneRow() { - innerPlan = m.Children()[0] + } else { + switch innerPlanTyped := innerPlan.(type) { + case *logicalop.LogicalSelection: + // If the inner plan is a selection, we add this condition to join predicates. + // Notice that no matter what kind of join is, it's always right. + newConds := make([]expression.Expression, 0, len(innerPlanTyped.Conditions)) + for _, cond := range innerPlanTyped.Conditions { + newConds = append(newConds, cond.Decorrelate(outerPlan.Schema())) + } + apply.AttachOnConds(newConds) + innerPlan = innerPlanTyped.Children()[0] apply.SetChildren(outerPlan, innerPlan) return s.optimize(ctx, p, groupByColumn) - } - } else if proj, ok := innerPlan.(*logicalop.LogicalProjection); ok { - // After the column pruning, some expressions in the projection operator may be pruned. - // In this situation, we can decorrelate the apply operator. - if apply.JoinType == base.LeftOuterJoin { - if skipDecorrelateProjectionForLeftOuterApply(apply, proj) { - goto NoOptimize + case *logicalop.LogicalMaxOneRow: + // Check if MaxOneRow's child is Limit or TopN, and if we can remove it for LeftOuterJoin + // Also handle the case where there's a Projection between MaxOneRow and Limit: MaxOneRow -> Projection -> Limit + if apply.JoinType == base.LeftOuterJoin { + mChild := innerPlanTyped.Children()[0] + var removePlan base.LogicalPlan + var canRemove bool + switch mChild := mChild.(type) { + case *logicalop.LogicalLimit: + // Limit with non-0 offset cannot be removed, but we still check for redundant MaxOneRow + if mChild.Offset != 0 { + canRemove = false + } else { + // Check if join key is unique key + removePlan = mChild.Children()[0] + if isJoinKeyUniqueKey(apply, removePlan) { + canRemove = true + } + } + case *logicalop.LogicalProjection: + // Check if Projection's child is Limit: MaxOneRow -> Projection -> Limit + // This pattern occurs when subqueries contain some clauses like ORDER BY or HAVING clauses that require projection. + // Examples: + // - HAVING clause: SELECT ... (SELECT AVG(...) FROM ... GROUP BY ... HAVING ... LIMIT 1) + // - ORDER BY clause: SELECT ... (SELECT ... FROM ... ORDER BY ... LIMIT 1) + if li, ok := mChild.Children()[0].(*logicalop.LogicalLimit); ok { + // Limit with non-0 offset cannot be removed, but we still check for redundant MaxOneRow + if li.Offset != 0 { + canRemove = false + } else { + // Check if join key is unique key + removePlan = li.Children()[0] + if isJoinKeyUniqueKey(apply, removePlan) { + canRemove = true + } + } + } + } + // If LIMIT can be removed (join key is unique key), remove it and re-enter decorrelate solver + if canRemove { + apply.SetChildren(outerPlan, removePlan) + return s.optimize(ctx, p, groupByColumn) + } + } + // If child is already MaxOneRow, remove redundant wrapper + if innerPlanTyped.Children()[0].MaxOneRow() { + innerPlan = innerPlanTyped.Children()[0] + apply.SetChildren(outerPlan, innerPlan) + return s.optimize(ctx, p, groupByColumn) + } + case *logicalop.LogicalProjection: + proj := innerPlanTyped + // After the column pruning, some expressions in the projection operator may be pruned. + // In this situation, we can decorrelate the apply operator. + if apply.JoinType == base.LeftOuterJoin { + if skipDecorrelateProjectionForLeftOuterApply(apply, proj) { + goto NoOptimize + } } - } - // step1: substitute the all the schema with new expressions (including correlated column maybe, but it doesn't affect the collation infer inside) - // eg: projection: constant("guo") --> column8, once upper layer substitution failed here, the lower layer behind - // projection can't supply column8 anymore. - // - // upper OP (depend on column8) --> projection(constant "guo" --> column8) --> lower layer OP - // | ^ - // +-------------------------------------------------------+ - // - // upper OP (depend on column8) --> lower layer OP - // | ^ - // +-----------------------------+ // Fail: lower layer can't supply column8 anymore. - hasFail := apply.ColumnSubstituteAll(proj.Schema(), proj.Exprs) - if hasFail { - goto NoOptimize - } - // step2: when it can be substituted all, we then just do the de-correlation (apply conditions included). - for i, expr := range proj.Exprs { - proj.Exprs[i] = expr.Decorrelate(outerPlan.Schema()) - } - apply.Decorrelate(outerPlan.Schema()) - - innerPlan = proj.Children()[0] - apply.SetChildren(outerPlan, innerPlan) - if apply.JoinType != base.SemiJoin && apply.JoinType != base.LeftOuterSemiJoin && apply.JoinType != base.AntiSemiJoin && apply.JoinType != base.AntiLeftOuterSemiJoin { - proj.SetSchema(apply.Schema()) - proj.Exprs = append(expression.Column2Exprs(outerPlan.Schema().Clone().Columns), proj.Exprs...) - apply.SetSchema(expression.MergeSchema(outerPlan.Schema(), innerPlan.Schema())) - np, planChanged, err := s.optimize(ctx, p, groupByColumn) - if err != nil { - return nil, planChanged, err + // step1: substitute the all the schema with new expressions (including correlated column maybe, but it doesn't affect the collation infer inside) + // eg: projection: constant("guo") --> column8, once upper layer substitution failed here, the lower layer behind + // projection can't supply column8 anymore. + // + // upper OP (depend on column8) --> projection(constant "guo" --> column8) --> lower layer OP + // | ^ + // +-------------------------------------------------------+ + // + // upper OP (depend on column8) --> lower layer OP + // | ^ + // +-----------------------------+ // Fail: lower layer can't supply column8 anymore. + hasFail := apply.ColumnSubstituteAll(proj.Schema(), proj.Exprs) + if hasFail { + goto NoOptimize } - proj.SetChildren(np) - return proj, planChanged, nil - } - return s.optimize(ctx, p, groupByColumn) - } else if li, ok := innerPlan.(*logicalop.LogicalLimit); ok { - // The presence of 'limit' in 'exists' will make the plan not optimal, so we need to decorrelate the 'limit' of subquery in optimization. - // e.g. select count(*) from test t1 where exists (select value from test t2 where t1.id = t2.id limit 1); When using 'limit' in subquery, the plan will not optimal. - // If apply is not SemiJoin, the output of it might be expanded even though we are `limit 1`. - if apply.JoinType != base.SemiJoin && apply.JoinType != base.LeftOuterSemiJoin && apply.JoinType != base.AntiSemiJoin && apply.JoinType != base.AntiLeftOuterSemiJoin { - goto NoOptimize - } - // If subquery has some filter condition, we will not optimize limit. - if len(apply.LeftConditions) > 0 || len(apply.RightConditions) > 0 || len(apply.OtherConditions) > 0 || len(apply.EqualConditions) > 0 { - goto NoOptimize - } - // Limit with non-0 offset will conduct an impact of itself on the final result set from its sub-child, consequently determining the bool value of the exist subquery. - if li.Offset == 0 { - innerPlan = li.Children()[0] - apply.SetChildren(outerPlan, innerPlan) - return s.optimize(ctx, p, groupByColumn) - } - } else if agg, ok := innerPlan.(*logicalop.LogicalAggregation); ok { - if apply.CanPullUpAgg() && agg.CanPullUp() { - innerPlan = agg.Children()[0] - apply.JoinType = base.LeftOuterJoin - apply.SetChildren(outerPlan, innerPlan) - agg.SetSchema(apply.Schema()) - agg.GroupByItems = expression.Column2Exprs(outerPlan.Schema().PKOrUK[0]) - newAggFuncs := make([]*aggregation.AggFuncDesc, 0, apply.Schema().Len()) + // step2: when it can be substituted all, we then just do the de-correlation (apply conditions included). + for i, expr := range proj.Exprs { + proj.Exprs[i] = expr.Decorrelate(outerPlan.Schema()) + } + apply.Decorrelate(outerPlan.Schema()) - outerColsInSchema := make([]*expression.Column, 0, outerPlan.Schema().Len()) - for i, col := range outerPlan.Schema().Columns { - first, err := aggregation.NewAggFuncDesc(agg.SCtx().GetExprCtx(), ast.AggFuncFirstRow, []expression.Expression{col}, false) + innerPlan = proj.Children()[0] + apply.SetChildren(outerPlan, innerPlan) + if apply.JoinType != base.SemiJoin && apply.JoinType != base.LeftOuterSemiJoin && apply.JoinType != base.AntiSemiJoin && apply.JoinType != base.AntiLeftOuterSemiJoin { + proj.SetSchema(apply.Schema()) + proj.Exprs = append(expression.Column2Exprs(outerPlan.Schema().Clone().Columns), proj.Exprs...) + apply.SetSchema(expression.MergeSchema(outerPlan.Schema(), innerPlan.Schema())) + np, planChanged, err := s.optimize(ctx, p, groupByColumn) if err != nil { return nil, planChanged, err } - newAggFuncs = append(newAggFuncs, first) - - outerCol, _ := outerPlan.Schema().Columns[i].Clone().(*expression.Column) - outerCol.RetType = first.RetTp - outerColsInSchema = append(outerColsInSchema, outerCol) + proj.SetChildren(np) + return proj, planChanged, nil } - apply.SetSchema(expression.MergeSchema(expression.NewSchema(outerColsInSchema...), innerPlan.Schema())) - util.ResetNotNullFlag(apply.Schema(), outerPlan.Schema().Len(), apply.Schema().Len()) - for i, aggFunc := range agg.AggFuncs { - aggArgs := make([]expression.Expression, 0, len(aggFunc.Args)) - for _, arg := range aggFunc.Args { - switch expr := arg.(type) { - case *expression.Column: - if idx := apply.Schema().ColumnIndex(expr); idx != -1 { - aggArgs = append(aggArgs, apply.Schema().Columns[idx]) - } else { + return s.optimize(ctx, p, groupByColumn) + case *logicalop.LogicalLimit: + // The presence of 'limit' in 'exists' will make the plan not optimal, so we need to decorrelate the 'limit' of subquery in optimization. + // e.g. select count(*) from test t1 where exists (select value from test t2 where t1.id = t2.id limit 1); When using 'limit' in subquery, the plan will not optimal. + // If apply is not SemiJoin, the output of it might be expanded even though we are `limit 1`. + if apply.JoinType != base.SemiJoin && apply.JoinType != base.LeftOuterSemiJoin && apply.JoinType != base.AntiSemiJoin && apply.JoinType != base.AntiLeftOuterSemiJoin { + goto NoOptimize + } + // If subquery has some filter condition, we will not optimize limit. + if len(apply.LeftConditions) > 0 || len(apply.RightConditions) > 0 || len(apply.OtherConditions) > 0 || len(apply.EqualConditions) > 0 { + goto NoOptimize + } + // Limit with non-0 offset will conduct an impact of itself on the final result set from its sub-child, consequently determining the bool value of the exist subquery. + if innerPlanTyped.Offset == 0 { + innerPlan = innerPlanTyped.Children()[0] + apply.SetChildren(outerPlan, innerPlan) + return s.optimize(ctx, p, groupByColumn) + } + case *logicalop.LogicalAggregation: + agg := innerPlanTyped + if apply.CanPullUpAgg() && agg.CanPullUp() { + innerPlan = agg.Children()[0] + apply.JoinType = base.LeftOuterJoin + apply.SetChildren(outerPlan, innerPlan) + agg.SetSchema(apply.Schema()) + agg.GroupByItems = expression.Column2Exprs(outerPlan.Schema().PKOrUK[0]) + newAggFuncs := make([]*aggregation.AggFuncDesc, 0, apply.Schema().Len()) + + outerColsInSchema := make([]*expression.Column, 0, outerPlan.Schema().Len()) + for i, col := range outerPlan.Schema().Columns { + first, err := aggregation.NewAggFuncDesc(agg.SCtx().GetExprCtx(), ast.AggFuncFirstRow, []expression.Expression{col}, false) + if err != nil { + return nil, planChanged, err + } + newAggFuncs = append(newAggFuncs, first) + + outerCol, _ := outerPlan.Schema().Columns[i].Clone().(*expression.Column) + outerCol.RetType = first.RetTp + outerColsInSchema = append(outerColsInSchema, outerCol) + } + apply.SetSchema(expression.MergeSchema(expression.NewSchema(outerColsInSchema...), innerPlan.Schema())) + util.ResetNotNullFlag(apply.Schema(), outerPlan.Schema().Len(), apply.Schema().Len()) + for i, aggFunc := range agg.AggFuncs { + aggArgs := make([]expression.Expression, 0, len(aggFunc.Args)) + for _, arg := range aggFunc.Args { + switch expr := arg.(type) { + case *expression.Column: + if idx := apply.Schema().ColumnIndex(expr); idx != -1 { + aggArgs = append(aggArgs, apply.Schema().Columns[idx]) + } else { + aggArgs = append(aggArgs, expr) + } + case *expression.ScalarFunction: + expr.RetType = expr.RetType.Clone() + expr.RetType.DelFlag(mysql.NotNullFlag) + aggArgs = append(aggArgs, expr) + default: aggArgs = append(aggArgs, expr) } - case *expression.ScalarFunction: - expr.RetType = expr.RetType.Clone() - expr.RetType.DelFlag(mysql.NotNullFlag) - aggArgs = append(aggArgs, expr) - default: - aggArgs = append(aggArgs, expr) } + desc, err := aggregation.NewAggFuncDesc(agg.SCtx().GetExprCtx(), agg.AggFuncs[i].Name, aggArgs, agg.AggFuncs[i].HasDistinct) + if err != nil { + return nil, planChanged, err + } + newAggFuncs = append(newAggFuncs, desc) } - desc, err := aggregation.NewAggFuncDesc(agg.SCtx().GetExprCtx(), agg.AggFuncs[i].Name, aggArgs, agg.AggFuncs[i].HasDistinct) + agg.AggFuncs = newAggFuncs + np, planChanged, err := s.optimize(ctx, p, groupByColumn) if err != nil { return nil, planChanged, err } - newAggFuncs = append(newAggFuncs, desc) - } - agg.AggFuncs = newAggFuncs - np, planChanged, err := s.optimize(ctx, p, groupByColumn) - if err != nil { - return nil, planChanged, err + agg.SetChildren(np) + // TODO: Add a Projection if any argument of aggregate funcs or group by items are scalar functions. + // agg.buildProjectionIfNecessary() + return agg, planChanged, nil } - agg.SetChildren(np) - // TODO: Add a Projection if any argument of aggregate funcs or group by items are scalar functions. - // agg.buildProjectionIfNecessary() - return agg, planChanged, nil - } - // We can pull up the equal conditions below the aggregation as the join key of the apply, if only - // the equal conditions contain the correlated column of this apply. - if sel, ok := agg.Children()[0].(*logicalop.LogicalSelection); ok && apply.JoinType == base.LeftOuterJoin { - var ( - eqCondWithCorCol []*expression.ScalarFunction - remainedExpr []expression.Expression - ) - // Extract the equal condition. - for _, cond := range sel.Conditions { - if expr := apply.DeCorColFromEqExpr(cond); expr != nil { - eqCondWithCorCol = append(eqCondWithCorCol, expr.(*expression.ScalarFunction)) - } else { - remainedExpr = append(remainedExpr, cond) + // We can pull up the equal conditions below the aggregation as the join key of the apply, if only + // the equal conditions contain the correlated column of this apply. + if sel, ok := agg.Children()[0].(*logicalop.LogicalSelection); ok && apply.JoinType == base.LeftOuterJoin { + var ( + eqCondWithCorCol []*expression.ScalarFunction + remainedExpr []expression.Expression + ) + // Extract the equal condition. + for _, cond := range sel.Conditions { + if expr := apply.DeCorColFromEqExpr(cond); expr != nil { + eqCondWithCorCol = append(eqCondWithCorCol, expr.(*expression.ScalarFunction)) + } else { + remainedExpr = append(remainedExpr, cond) + } } - } - if len(eqCondWithCorCol) > 0 { - originalExpr := sel.Conditions - sel.Conditions = remainedExpr - apply.CorCols = coreusage.ExtractCorColumnsBySchema4LogicalPlan(apply.Children()[1], apply.Children()[0].Schema()) - // There's no other correlated column. - groupByCols := expression.NewSchema(agg.GetGroupByCols()...) - if len(apply.CorCols) == 0 { - appendedGroupByCols := expression.NewSchema() - var appendedAggFuncs []*aggregation.AggFuncDesc - - join := &apply.LogicalJoin - join.EqualConditions = append(join.EqualConditions, eqCondWithCorCol...) - for _, eqCond := range eqCondWithCorCol { - clonedCol := eqCond.GetArgs()[1].(*expression.Column) - // If the join key is not in the aggregation's schema, add first row function. - if agg.Schema().ColumnIndex(eqCond.GetArgs()[1].(*expression.Column)) == -1 { - newFunc, err := aggregation.NewAggFuncDesc(apply.SCtx().GetExprCtx(), ast.AggFuncFirstRow, []expression.Expression{clonedCol}, false) - if err != nil { - return nil, planChanged, err + if len(eqCondWithCorCol) > 0 { + originalExpr := sel.Conditions + sel.Conditions = remainedExpr + apply.CorCols = coreusage.ExtractCorColumnsBySchema4LogicalPlan(apply.Children()[1], apply.Children()[0].Schema()) + // There's no other correlated column. + groupByCols := expression.NewSchema(agg.GetGroupByCols()...) + if len(apply.CorCols) == 0 { + appendedGroupByCols := expression.NewSchema() + var appendedAggFuncs []*aggregation.AggFuncDesc + + join := &apply.LogicalJoin + join.EqualConditions = append(join.EqualConditions, eqCondWithCorCol...) + for _, eqCond := range eqCondWithCorCol { + clonedCol := eqCond.GetArgs()[1].(*expression.Column) + // If the join key is not in the aggregation's schema, add first row function. + if agg.Schema().ColumnIndex(eqCond.GetArgs()[1].(*expression.Column)) == -1 { + newFunc, err := aggregation.NewAggFuncDesc(apply.SCtx().GetExprCtx(), ast.AggFuncFirstRow, []expression.Expression{clonedCol}, false) + if err != nil { + return nil, planChanged, err + } + agg.AggFuncs = append(agg.AggFuncs, newFunc) + agg.Schema().Append(clonedCol) + agg.Schema().Columns[agg.Schema().Len()-1].RetType = newFunc.RetTp + appendedAggFuncs = append(appendedAggFuncs, newFunc) + } + // If group by cols don't contain the join key, add it into this. + if !groupByCols.Contains(clonedCol) { + agg.GroupByItems = append(agg.GroupByItems, clonedCol) + groupByCols.Append(clonedCol) + appendedGroupByCols.Append(clonedCol) } - agg.AggFuncs = append(agg.AggFuncs, newFunc) - agg.Schema().Append(clonedCol) - agg.Schema().Columns[agg.Schema().Len()-1].RetType = newFunc.RetTp - appendedAggFuncs = append(appendedAggFuncs, newFunc) } - // If group by cols don't contain the join key, add it into this. - if !groupByCols.Contains(clonedCol) { - agg.GroupByItems = append(agg.GroupByItems, clonedCol) - groupByCols.Append(clonedCol) - appendedGroupByCols.Append(clonedCol) + // The selection may be useless, check and remove it. + if len(sel.Conditions) == 0 { + agg.SetChildren(sel.Children()[0]) } - } - // The selection may be useless, check and remove it. - if len(sel.Conditions) == 0 { - agg.SetChildren(sel.Children()[0]) - } - defaultValueMap := s.aggDefaultValueMap(agg) - // We should use it directly, rather than building a projection. - if len(defaultValueMap) > 0 { - proj := logicalop.LogicalProjection{}.Init(agg.SCtx(), agg.QueryBlockOffset()) - proj.SetSchema(apply.Schema()) - proj.Exprs = expression.Column2Exprs(apply.Schema().Columns) - for i, val := range defaultValueMap { - pos := proj.Schema().ColumnIndex(agg.Schema().Columns[i]) - ifNullFunc := expression.NewFunctionInternal(agg.SCtx().GetExprCtx(), ast.Ifnull, types.NewFieldType(mysql.TypeLonglong), agg.Schema().Columns[i], val) - proj.Exprs[pos] = ifNullFunc + defaultValueMap := s.aggDefaultValueMap(agg) + // We should use it directly, rather than building a projection. + if len(defaultValueMap) > 0 { + proj := logicalop.LogicalProjection{}.Init(agg.SCtx(), agg.QueryBlockOffset()) + proj.SetSchema(apply.Schema()) + proj.Exprs = expression.Column2Exprs(apply.Schema().Columns) + for i, val := range defaultValueMap { + pos := proj.Schema().ColumnIndex(agg.Schema().Columns[i]) + ifNullFunc := expression.NewFunctionInternal(agg.SCtx().GetExprCtx(), ast.Ifnull, types.NewFieldType(mysql.TypeLonglong), agg.Schema().Columns[i], val) + proj.Exprs[pos] = ifNullFunc + } + proj.SetChildren(apply) + p = proj } - proj.SetChildren(apply) - p = proj + return s.optimize(ctx, p, groupByColumn) } - return s.optimize(ctx, p, groupByColumn) + sel.Conditions = originalExpr + apply.CorCols = coreusage.ExtractCorColumnsBySchema4LogicalPlan(apply.Children()[1], apply.Children()[0].Schema()) } - sel.Conditions = originalExpr - apply.CorCols = coreusage.ExtractCorColumnsBySchema4LogicalPlan(apply.Children()[1], apply.Children()[0].Schema()) } + case *logicalop.LogicalSort: + // Since we only pull up Selection, Projection, Aggregation, MaxOneRow, + // the top level Sort has no effect on the subquery's result. + innerPlan = innerPlanTyped.Children()[0] + apply.SetChildren(outerPlan, innerPlan) + return s.optimize(ctx, p, groupByColumn) } - } else if sort, ok := innerPlan.(*logicalop.LogicalSort); ok { - // Since we only pull up Selection, Projection, Aggregation, MaxOneRow, - // the top level Sort has no effect on the subquery's result. - innerPlan = sort.Children()[0] - apply.SetChildren(outerPlan, innerPlan) - return s.optimize(ctx, p, groupByColumn) } } NoOptimize: @@ -470,6 +519,160 @@ func (*DecorrelateSolver) Name() string { return "decorrelate" } +// extractJoinKeyFromCondition extracts the inner join key column from an equality condition. +// It checks if the condition is of the form "outer_col = inner_col" where outer_col is a correlated column +// from the Apply operator. Returns the inner column if it belongs to the given schema, otherwise returns nil. +func extractJoinKeyFromCondition(apply *logicalop.LogicalApply, cond expression.Expression, schema *expression.Schema) *expression.Column { + decExpr := apply.DeCorColFromEqExpr(cond) + if decExpr == nil { + return nil + } + sf, ok := decExpr.(*expression.ScalarFunction) + if !ok || sf.FuncName.L != ast.EQ { + return nil + } + args := sf.GetArgs() + if len(args) != 2 { + return nil + } + innerCol, ok := args[1].(*expression.Column) + if !ok || !schema.Contains(innerCol) { + return nil + } + return innerCol +} + +// findDataSource recursively finds the underlying DataSource in a logical plan tree. +// It returns the first DataSource found, or nil if none exists. +func findDataSource(p base.LogicalPlan) *logicalop.DataSource { + if ds, ok := p.(*logicalop.DataSource); ok { + return ds + } + for _, child := range p.Children() { + if ds := findDataSource(child); ds != nil { + return ds + } + } + return nil +} + +// canGenerateMultipleRows returns if the LogicalPlan can generate multiple rows from a single input row. +// This is used to check if an operator can expand rows, which affects uniqueness constraints. +// Operators that can generate multiple rows include: +// - JOIN (except semi/anti joins which preserve row count) +// - UNION ALL +// - PartitionUnionAll +// - Expand (for GROUPING SETS/ROLLUP) +// - TODO: unnest function when implemented +func canGenerateMultipleRows(p base.LogicalPlan) bool { + switch p.(type) { + case *logicalop.LogicalJoin: + // JOIN operators can generate multiple rows (Cartesian product effect) + // Note: Semi/Anti joins preserve row count, but we return true here for safety + // as the caller can refine this check if needed + return true + case *logicalop.LogicalUnionAll, *logicalop.LogicalPartitionUnionAll: + // UNION ALL combines multiple inputs, potentially generating multiple rows + return true + case *logicalop.LogicalExpand: + // Expand operator splits rows for GROUPING SETS/ROLLUP + return true + } + return false +} + +// isJoinKeyUniqueKey checks if join key is unique key. +// Returns true if the join key forms a unique key constraint. +func isJoinKeyUniqueKey(apply *logicalop.LogicalApply, plan base.LogicalPlan) bool { + var hasMultiRowOperator func(base.LogicalPlan) bool + hasMultiRowOperator = func(p base.LogicalPlan) bool { + // Use centralized function to check if operator can generate multiple rows + if canGenerateMultipleRows(p) { + return true + } + // Recursively check children + for _, child := range p.Children() { + if hasMultiRowOperator(child) { + return true + } + } + return false + } + if hasMultiRowOperator(plan) { + return false + } + + // Extract join keys from Selection conditions and their children recursively + // Join conditions may be pushed down to DataSource or nested in child Selection nodes + innerJoinKeys := make([]*expression.Column, 0) + + // Recursively extract all conditions from Selection nodes and their children + var extractConditions func(base.LogicalPlan) + extractConditions = func(p base.LogicalPlan) { + if sel, ok := p.(*logicalop.LogicalSelection); ok { + // Check conditions directly on Selection + for _, cond := range sel.Conditions { + if innerCol := extractJoinKeyFromCondition(apply, cond, sel.Schema()); innerCol != nil { + innerJoinKeys = append(innerJoinKeys, innerCol) + } + } + // Continue to check children recursively + } else if ds, ok := p.(*logicalop.DataSource); ok { + // Check conditions in DataSource (PushedDownConds may contain join key conditions) + for _, cond := range ds.PushedDownConds { + if innerCol := extractJoinKeyFromCondition(apply, cond, ds.Schema()); innerCol != nil { + innerJoinKeys = append(innerJoinKeys, innerCol) + } + } + // Stop recursion at DataSource + return + } + // Continue recursion for other nodes + for _, child := range p.Children() { + extractConditions(child) + } + } + + extractConditions(plan) + if len(innerJoinKeys) == 0 { + return false + } + + // Find the underlying DataSource to get PKOrUK + ds := findDataSource(plan) + if ds == nil { + return false + } + + // Use PKOrUK from DataSource Schema directly + if len(ds.Schema().PKOrUK) == 0 { + return false + } + + // Check if join keys form a unique key + for _, keyInfo := range ds.Schema().PKOrUK { + allMatch := true + for _, keyCol := range keyInfo { + found := false + for _, joinKey := range innerJoinKeys { + if keyCol.ID == joinKey.ID && keyCol.ID != 0 { + found = true + break + } + } + if !found { + allMatch = false + break + } + } + if allMatch && len(keyInfo) > 0 { + return true + } + } + + return false +} + // Return true if we should skip decorrelation for LeftOuterApply + Projection. func skipDecorrelateProjectionForLeftOuterApply(apply *logicalop.LogicalApply, proj *logicalop.LogicalProjection) bool { allConst := len(proj.Exprs) > 0 diff --git a/pkg/planner/core/testdata/decorrelate_limit_suite_in.json b/pkg/planner/core/testdata/decorrelate_limit_suite_in.json new file mode 100644 index 0000000000000..cba71e4feefff --- /dev/null +++ b/pkg/planner/core/testdata/decorrelate_limit_suite_in.json @@ -0,0 +1,22 @@ +[ + { + "Name": "TestDecorrelateLimitOptimization", + "Cases": [ + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 0 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 1 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, ( SELECT e2.salary FROM employees e2 WHERE e2.dept_id = e.dept_id LIMIT 1 OFFSET 0) AS avg_dept_salary FROM employees e WHERE e.dept_id = 1", + "EXPLAIN format = 'plan_tree' SELECT e.id, e.name, e.salary, (SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id ORDER BY en.created_at DESC LIMIT 1) AS latest_note FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id LEFT JOIN employees e3 ON e3.id = e2.dept_id WHERE e2.id = e.id LIMIT 1) AS note_multi_join FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 INNER JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id LIMIT 1) AS salary_inner_join FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT AVG(e2.salary) FROM employees e2 WHERE e2.id = e.id GROUP BY e2.dept_id HAVING AVG(e2.salary) > 1000 LIMIT 1) AS avg_salary_having FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT count(e2.dept_id) FROM employees e2 WHERE e2.id = e.id limit 1) AS distinct_dept_id FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND e2.dept_id IN ( SELECT dept_id FROM employees e3 WHERE e3.id = e.id LIMIT 1 ) LIMIT 1) AS salary_nested FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND EXISTS ( SELECT 1 FROM employee_notes en WHERE en.employee_id = e2.id ) LIMIT 1) AS salary_exists FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT e2.salary FROM employees e2 WHERE e2.id = e.id ORDER BY e2.dept_id, e2.salary DESC LIMIT 1) AS salary_order_multi FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT ROW_NUMBER() OVER (ORDER BY e2.salary DESC) FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS row_num FROM employees e", + "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e" + ] + } +] + diff --git a/pkg/planner/core/testdata/decorrelate_limit_suite_out.json b/pkg/planner/core/testdata/decorrelate_limit_suite_out.json new file mode 100644 index 0000000000000..71b7cd5ccb3af --- /dev/null +++ b/pkg/planner/core/testdata/decorrelate_limit_suite_out.json @@ -0,0 +1,201 @@ +[ + { + "Name": "TestDecorrelateLimitOptimization", + "Cases": [ + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 0 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] gt(test.employees.dept_id, 1)", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 1 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:Selection", + " │ └─Selection cop[tikv] gt(test.employees.dept_id, 1)", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:1, count:1", + " └─TableReader root data:Limit", + " └─Limit cop[tikv] offset:0, count:2", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, ( SELECT e2.salary FROM employees e2 WHERE e2.dept_id = e.dept_id LIMIT 1 OFFSET 0) AS avg_dept_salary FROM employees e WHERE e.dept_id = 1", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:Selection", + " │ └─Selection cop[tikv] eq(test.employees.dept_id, 1)", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─TableReader root data:Limit", + " └─Limit cop[tikv] offset:0, count:1", + " └─Selection cop[tikv] eq(test.employees.dept_id, test.employees.dept_id)", + " └─TableFullScan cop[tikv] table:e2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.id, e.name, e.salary, (SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id ORDER BY en.created_at DESC LIMIT 1) AS latest_note FROM employees e", + "Plan": [ + "Apply root CARTESIAN left outer join, left side:TableReader", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + "└─TopN(Probe) root test.employee_notes.created_at:desc, offset:0, count:1", + " └─IndexHashJoin root inner join, inner:IndexLookUp, outer key:test.employees.id, inner key:test.employee_notes.employee_id, equal cond:eq(test.employees.id, test.employee_notes.employee_id)", + " ├─TableReader(Build) root data:TableRangeScan", + " │ └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo", + " └─IndexLookUp(Probe) root ", + " ├─Selection(Build) cop[tikv] eq(test.employee_notes.employee_id, test.employees.id), not(isnull(test.employee_notes.employee_id))", + " │ └─IndexRangeScan cop[tikv] table:en, index:idx_employee_id(employee_id) range: decided by [eq(test.employee_notes.employee_id, test.employees.id)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) cop[tikv] table:en keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id LEFT JOIN employees e3 ON e3.id = e2.dept_id WHERE e2.id = e.id LIMIT 1) AS note_multi_join FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employee_notes.note", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─IndexJoin root inner join, inner:TableReader, outer key:test.employee_notes.employee_id, inner key:test.employees.id, equal cond:eq(test.employee_notes.employee_id, test.employees.id)", + " ├─IndexLookUp(Build) root ", + " │ ├─Selection(Build) cop[tikv] eq(test.employee_notes.employee_id, test.employees.id)", + " │ │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:false, stats:pseudo", + " │ └─TableRowIDScan(Probe) cop[tikv] table:en keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] eq(test.employees.id, test.employees.id)", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [test.employee_notes.employee_id], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 INNER JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id LIMIT 1) AS salary_inner_join FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─IndexJoin root inner join, inner:TableReader, outer key:test.employee_notes.employee_id, inner key:test.employees.id, equal cond:eq(test.employee_notes.employee_id, test.employees.id)", + " ├─IndexReader(Build) root index:Selection", + " │ └─Selection cop[tikv] eq(test.employee_notes.employee_id, test.employees.id)", + " │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] eq(test.employees.id, test.employees.id)", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [test.employee_notes.employee_id], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT AVG(e2.salary) FROM employees e2 WHERE e2.id = e.id GROUP BY e2.dept_id HAVING AVG(e2.salary) > 1000 LIMIT 1) AS avg_salary_having FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─Projection(Build) root cast(test.employees.salary, decimal(14,6) BINARY)->Column#16, test.employees.id", + "│ └─TableReader root data:Selection", + "│ └─Selection cop[tikv] gt(cast(test.employees.salary, decimal(14,6) BINARY), 1000)", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT count(e2.dept_id) FROM employees e2 WHERE e2.id = e.id limit 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, if(isnull(test.employees.dept_id), 0, 1)->Column#16", + "└─MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + " └─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND e2.dept_id IN ( SELECT dept_id FROM employees e3 WHERE e3.id = e.id LIMIT 1 ) LIMIT 1) AS salary_nested FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─HashJoin root inner join, equal:[eq(test.employees.dept_id, test.employees.dept_id)]", + " ├─HashAgg(Build) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " │ └─Selection root not(isnull(test.employees.dept_id))", + " │ └─Limit root offset:0, count:1", + " │ └─TableReader root data:Limit", + " │ └─Limit cop[tikv] offset:0, count:1", + " │ └─TableRangeScan cop[tikv] table:e3 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] not(isnull(test.employees.dept_id))", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND EXISTS ( SELECT 1 FROM employee_notes en WHERE en.employee_id = e2.id ) LIMIT 1) AS salary_exists FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─MergeJoin(Build) root semi join, left side:TableReader, left key:test.employees.id, right key:test.employee_notes.employee_id", + "│ ├─IndexReader(Build) root index:IndexFullScan", + "│ │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:true, stats:pseudo", + "│ └─TableReader(Probe) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT e2.salary FROM employees e2 WHERE e2.id = e.id ORDER BY e2.dept_id, e2.salary DESC LIMIT 1) AS salary_order_multi FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.dept_id", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─HashAgg(Probe) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT ROW_NUMBER() OVER (ORDER BY e2.salary DESC) FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS row_num FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, Column#17", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Window(Probe) root row_number()->Column#17 over(order by test.employees.salary desc rows between current row and current row)", + " └─Sort root test.employees.salary:desc", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.dept_id", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─HashAgg(Probe) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + } + ] + } +] diff --git a/pkg/planner/core/testdata/decorrelate_limit_suite_xut.json b/pkg/planner/core/testdata/decorrelate_limit_suite_xut.json new file mode 100644 index 0000000000000..71b7cd5ccb3af --- /dev/null +++ b/pkg/planner/core/testdata/decorrelate_limit_suite_xut.json @@ -0,0 +1,201 @@ +[ + { + "Name": "TestDecorrelateLimitOptimization", + "Cases": [ + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 0 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] gt(test.employees.dept_id, 1)", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary , ( select salary FROM employees e2 WHERE e2.id = e.id LIMIT 1 OFFSET 1 ) AS avg_dept_salary FROM employees e WHERE e.dept_id > 1", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:Selection", + " │ └─Selection cop[tikv] gt(test.employees.dept_id, 1)", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:1, count:1", + " └─TableReader root data:Limit", + " └─Limit cop[tikv] offset:0, count:2", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, ( SELECT e2.salary FROM employees e2 WHERE e2.dept_id = e.dept_id LIMIT 1 OFFSET 0) AS avg_dept_salary FROM employees e WHERE e.dept_id = 1", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:Selection", + " │ └─Selection cop[tikv] eq(test.employees.dept_id, 1)", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─TableReader root data:Limit", + " └─Limit cop[tikv] offset:0, count:1", + " └─Selection cop[tikv] eq(test.employees.dept_id, test.employees.dept_id)", + " └─TableFullScan cop[tikv] table:e2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.id, e.name, e.salary, (SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id ORDER BY en.created_at DESC LIMIT 1) AS latest_note FROM employees e", + "Plan": [ + "Apply root CARTESIAN left outer join, left side:TableReader", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + "└─TopN(Probe) root test.employee_notes.created_at:desc, offset:0, count:1", + " └─IndexHashJoin root inner join, inner:IndexLookUp, outer key:test.employees.id, inner key:test.employee_notes.employee_id, equal cond:eq(test.employees.id, test.employee_notes.employee_id)", + " ├─TableReader(Build) root data:TableRangeScan", + " │ └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo", + " └─IndexLookUp(Probe) root ", + " ├─Selection(Build) cop[tikv] eq(test.employee_notes.employee_id, test.employees.id), not(isnull(test.employee_notes.employee_id))", + " │ └─IndexRangeScan cop[tikv] table:en, index:idx_employee_id(employee_id) range: decided by [eq(test.employee_notes.employee_id, test.employees.id)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) cop[tikv] table:en keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT en.note FROM employees e2 JOIN employee_notes en ON en.employee_id = e2.id LEFT JOIN employees e3 ON e3.id = e2.dept_id WHERE e2.id = e.id LIMIT 1) AS note_multi_join FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employee_notes.note", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─IndexJoin root inner join, inner:TableReader, outer key:test.employee_notes.employee_id, inner key:test.employees.id, equal cond:eq(test.employee_notes.employee_id, test.employees.id)", + " ├─IndexLookUp(Build) root ", + " │ ├─Selection(Build) cop[tikv] eq(test.employee_notes.employee_id, test.employees.id)", + " │ │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:false, stats:pseudo", + " │ └─TableRowIDScan(Probe) cop[tikv] table:en keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] eq(test.employees.id, test.employees.id)", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [test.employee_notes.employee_id], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 INNER JOIN employee_notes en ON en.employee_id = e2.id WHERE e2.id = e.id LIMIT 1) AS salary_inner_join FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─IndexJoin root inner join, inner:TableReader, outer key:test.employee_notes.employee_id, inner key:test.employees.id, equal cond:eq(test.employee_notes.employee_id, test.employees.id)", + " ├─IndexReader(Build) root index:Selection", + " │ └─Selection cop[tikv] eq(test.employee_notes.employee_id, test.employees.id)", + " │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] eq(test.employees.id, test.employees.id)", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [test.employee_notes.employee_id], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT AVG(e2.salary) FROM employees e2 WHERE e2.id = e.id GROUP BY e2.dept_id HAVING AVG(e2.salary) > 1000 LIMIT 1) AS avg_salary_having FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─Projection(Build) root cast(test.employees.salary, decimal(14,6) BINARY)->Column#16, test.employees.id", + "│ └─TableReader root data:Selection", + "│ └─Selection cop[tikv] gt(cast(test.employees.salary, decimal(14,6) BINARY), 1000)", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT count(e2.dept_id) FROM employees e2 WHERE e2.id = e.id limit 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, if(isnull(test.employees.dept_id), 0, 1)->Column#16", + "└─MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + " └─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND e2.dept_id IN ( SELECT dept_id FROM employees e3 WHERE e3.id = e.id LIMIT 1 ) LIMIT 1) AS salary_nested FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.salary", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Limit(Probe) root offset:0, count:1", + " └─HashJoin root inner join, equal:[eq(test.employees.dept_id, test.employees.dept_id)]", + " ├─HashAgg(Build) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " │ └─Selection root not(isnull(test.employees.dept_id))", + " │ └─Limit root offset:0, count:1", + " │ └─TableReader root data:Limit", + " │ └─Limit cop[tikv] offset:0, count:1", + " │ └─TableRangeScan cop[tikv] table:e3 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo", + " └─TableReader(Probe) root data:Selection", + " └─Selection cop[tikv] not(isnull(test.employees.dept_id))", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary,(SELECT e2.salary FROM employees e2 WHERE e2.id = e.id AND EXISTS ( SELECT 1 FROM employee_notes en WHERE en.employee_id = e2.id ) LIMIT 1) AS salary_exists FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─MergeJoin(Build) root semi join, left side:TableReader, left key:test.employees.id, right key:test.employee_notes.employee_id", + "│ ├─IndexReader(Build) root index:IndexFullScan", + "│ │ └─IndexFullScan cop[tikv] table:en, index:idx_employee_id(employee_id) keep order:true, stats:pseudo", + "│ └─TableReader(Probe) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT e2.salary FROM employees e2 WHERE e2.id = e.id ORDER BY e2.dept_id, e2.salary DESC LIMIT 1) AS salary_order_multi FROM employees e", + "Plan": [ + "MergeJoin root left outer join, left side:TableReader, left key:test.employees.id, right key:test.employees.id", + "├─TableReader(Build) root data:TableFullScan", + "│ └─TableFullScan cop[tikv] table:e2 keep order:true, stats:pseudo", + "└─TableReader(Probe) root data:TableFullScan", + " └─TableFullScan cop[tikv] table:e keep order:true, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.dept_id", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─HashAgg(Probe) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT ROW_NUMBER() OVER (ORDER BY e2.salary DESC) FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS row_num FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, Column#17", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─Window(Probe) root row_number()->Column#17 over(order by test.employees.salary desc rows between current row and current row)", + " └─Sort root test.employees.salary:desc", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + }, + { + "SQL": "EXPLAIN format = 'plan_tree' SELECT e.name, e.salary, (SELECT DISTINCT e2.dept_id FROM employees e2 WHERE e2.id = e.id LIMIT 1) AS distinct_dept_id FROM employees e", + "Plan": [ + "Projection root test.employees.name, test.employees.salary, test.employees.dept_id", + "└─Apply root CARTESIAN left outer join, left side:TableReader", + " ├─TableReader(Build) root data:TableFullScan", + " │ └─TableFullScan cop[tikv] table:e keep order:false, stats:pseudo", + " └─HashAgg(Probe) root group by:test.employees.dept_id, funcs:firstrow(test.employees.dept_id)->test.employees.dept_id", + " └─TableReader root data:TableRangeScan", + " └─TableRangeScan cop[tikv] table:e2 range: decided by [eq(test.employees.id, test.employees.id)], keep order:false, stats:pseudo" + ] + } + ] + } +] diff --git a/pkg/planner/core/testdata/plan_suite_unexported_out.json b/pkg/planner/core/testdata/plan_suite_unexported_out.json index 47e4ca6c981f4..c43708c1e8c15 100644 --- a/pkg/planner/core/testdata/plan_suite_unexported_out.json +++ b/pkg/planner/core/testdata/plan_suite_unexported_out.json @@ -135,7 +135,7 @@ "Join{Join{DataScan(t)->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->Projection", "Join{Join{DataScan(t)->DataScan(x)}(test.t.a,test.t.a)->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->Projection", "Join{Join{DataScan(t)->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->DataScan(x)->Aggr(firstrow(test.t.a))}(test.t.a,test.t.a)->Projection->Projection", - "Apply{DataScan(t1)->DataScan(t2)->Sel([eq(test.t.a, test.t.a)])->Projection->Sort->Limit}->Projection->Sel([eq(test.t.b, test.t.b)])->Projection", + "Join{DataScan(t1)->DataScan(t2)}(test.t.a,test.t.a)->Projection->Sel([eq(test.t.b, test.t.b)])->Projection", "Apply{DataScan(t2)->DataScan(t1)->Sel([eq(test.t.a, test.t.a)])->Projection}->Projection", "DataScan(t2)->Aggr(count(1))->Projection" ] diff --git a/tests/integrationtest/r/planner/cascades/integration.result b/tests/integrationtest/r/planner/cascades/integration.result index 21d0b060be6a6..a9671c732e230 100644 --- a/tests/integrationtest/r/planner/cascades/integration.result +++ b/tests/integrationtest/r/planner/cascades/integration.result @@ -853,16 +853,14 @@ NULL explain format="plan_tree" select sum(a), (select t1.a from t1 where t1.a = t2.a limit 1), (select t1.b from t1 where t1.b = t2.b limit 1) from t2; id task access object operator info Projection root Column#7, planner__cascades__integration.t1.a, planner__cascades__integration.t1.b -└─Apply root CARTESIAN left outer join, left side:Apply - ├─Apply(Build) root CARTESIAN left outer join, left side:StreamAgg +└─Apply root CARTESIAN left outer join, left side:IndexJoin + ├─IndexJoin(Build) root left outer join, inner:TableReader, left side:StreamAgg, outer key:planner__cascades__integration.t2.a, inner key:planner__cascades__integration.t1.a, equal cond:eq(planner__cascades__integration.t2.a, planner__cascades__integration.t1.a) │ ├─StreamAgg(Build) root funcs:sum(Column#12)->Column#7, funcs:firstrow(Column#13)->planner__cascades__integration.t2.a, funcs:firstrow(Column#14)->planner__cascades__integration.t2.b │ │ └─Projection root cast(planner__cascades__integration.t2.a, decimal(10,0) BINARY)->Column#12, planner__cascades__integration.t2.a->Column#13, planner__cascades__integration.t2.b->Column#14 │ │ └─TableReader root data:TableFullScan │ │ └─TableFullScan cop[tikv] table:t2 keep order:false, stats:pseudo - │ └─Limit(Probe) root offset:0, count:1 - │ └─TableReader root data:Limit - │ └─Limit cop[tikv] offset:0, count:1 - │ └─TableRangeScan cop[tikv] table:t1 range: decided by [eq(planner__cascades__integration.t1.a, planner__cascades__integration.t2.a)], keep order:false, stats:pseudo + │ └─TableReader(Probe) root data:TableRangeScan + │ └─TableRangeScan cop[tikv] table:t1 range: decided by [planner__cascades__integration.t2.a], keep order:false, stats:pseudo └─Limit(Probe) root offset:0, count:1 └─TableReader root data:Limit └─Limit cop[tikv] offset:0, count:1