Skip to content

Commit 96736c4

Browse files
authored
tp: Add Experimental AddColumn join (#3494)
Adds a new experimental ExperimentalAddColumns operation to the structured query system that enriches a core query with additional columns from an input query using LEFT JOIN semantics. Key features: - Takes a core query (returns all its columns) + input query (source of additional columns) - Joins them using equality conditions or freeform SQL expressions - Use case: e.g., add process name to slice data by joining on process ID
1 parent 6c2171f commit 96736c4

File tree

4 files changed

+489
-0
lines changed

4 files changed

+489
-0
lines changed

protos/perfetto/perfetto_sql/structured_query.proto

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,54 @@ message PerfettoSqlStructuredQuery {
251251
optional bool use_union_all = 2;
252252
}
253253

254+
// DON'T USE. EXPERIMENTAL.
255+
// Adds columns from an input query to a core query using a join condition.
256+
//
257+
// This operation returns all columns from the core query plus specified
258+
// columns from the input query, joined using a condition similar to
259+
// ExperimentalJoin.
260+
//
261+
// Examples:
262+
// 1) Add process name to slices:
263+
// * core_query contains slice data (id, ts, dur, name)
264+
// * input_query contains process data (id, name, pid)
265+
// * input_columns = ["name"] (aliased appropriately)
266+
// * equality_columns matches process id
267+
// 2) Enrich counter data with additional metadata:
268+
// * core_query contains counter values
269+
// * input_query contains metadata table
270+
// * input_columns specifies which metadata columns to add
271+
//
272+
// Schema:
273+
// 1) The input_columns list must contain at least one column name.
274+
// 2) All column names in input_columns must exist in the input_query result.
275+
// 3) One of equality_columns or freeform_condition must be specified.
276+
message ExperimentalAddColumns {
277+
// The core query. All columns from this query will be returned. Required.
278+
optional PerfettoSqlStructuredQuery core_query = 1;
279+
280+
// The input query. Source of additional columns. Required.
281+
optional PerfettoSqlStructuredQuery input_query = 2;
282+
283+
// List of column names to add from the input query. At least one required.
284+
repeated string input_columns = 3;
285+
286+
// The condition for the join. One of the following is required.
287+
oneof condition {
288+
// Columns from both queries which must be equal for the join.
289+
// Reuses the EqualityColumns message from ExperimentalJoin, where
290+
// left_column refers to core_query and right_column refers to
291+
// input_query.
292+
ExperimentalJoin.EqualityColumns equality_columns = 4;
293+
294+
// A freeform SQL expression representing the join condition.
295+
// Reuses the FreeformCondition message from ExperimentalJoin, where
296+
// left_query_alias refers to core_query and right_query_alias refers
297+
// to input_query.
298+
ExperimentalJoin.FreeformCondition freeform_condition = 5;
299+
}
300+
}
301+
254302
// An opaque id field for the query. The convention is to use underscores
255303
// and lower case (foo_bar) but this is not enforced. Optional in the general
256304
// case but strongly recommended for good error messages. Required in cases
@@ -300,8 +348,25 @@ message PerfettoSqlStructuredQuery {
300348

301349
// DON'T USE. EXPERIMENTAL.
302350
ExperimentalUnion experimental_union = 101;
351+
352+
// DON'T USE. EXPERIMENTAL.
353+
ExperimentalAddColumns experimental_add_columns = 102;
303354
}
304355

356+
// Evaluation order of operations:
357+
// The fields below are evaluated in a specific order following SQL semantics.
358+
// The logical evaluation order is:
359+
// 1. FROM (source) - determine the source data
360+
// 2. WHERE (filters) - filter rows from the source
361+
// 3. GROUP BY (group_by) - group rows and compute aggregations
362+
// 4. SELECT (select_columns) - select and transform columns
363+
// 5. ORDER BY (order_by) - sort the result set
364+
// 6. LIMIT/OFFSET (limit, offset) - restrict the number of rows returned
365+
//
366+
// This follows the standard SQL evaluation order as defined in the SQL
367+
// standard (ISO/IEC 9075). For more details, see:
368+
// https://www.sqlite.org/lang_select.html
369+
305370
// Represents a single filter on a column.
306371
message Filter {
307372
// The column name to be filtered. Required.
611 Bytes
Binary file not shown.

src/trace_processor/perfetto_sql/generator/structured_query_generator.cc

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ class GeneratorImpl {
175175
base::StatusOr<std::string> Union(
176176
const StructuredQuery::ExperimentalUnion::Decoder&);
177177

178+
base::StatusOr<std::string> AddColumns(
179+
const StructuredQuery::ExperimentalAddColumns::Decoder&);
180+
178181
// Filtering.
179182
static base::StatusOr<std::string> Filters(RepeatedProto filters);
180183

@@ -263,6 +266,11 @@ base::StatusOr<std::string> GeneratorImpl::GenerateImpl() {
263266
StructuredQuery::ExperimentalUnion::Decoder union_decoder(
264267
q.experimental_union());
265268
ASSIGN_OR_RETURN(source, Union(union_decoder));
269+
270+
} else if (q.has_experimental_add_columns()) {
271+
StructuredQuery::ExperimentalAddColumns::Decoder add_columns_decoder(
272+
q.experimental_add_columns());
273+
ASSIGN_OR_RETURN(source, AddColumns(add_columns_decoder));
266274
} else if (q.has_sql()) {
267275
StructuredQuery::Sql::Decoder sql_source(q.sql());
268276
ASSIGN_OR_RETURN(source, SqlSource(sql_source));
@@ -575,6 +583,103 @@ base::StatusOr<std::string> GeneratorImpl::Union(
575583
return sql;
576584
}
577585

586+
base::StatusOr<std::string> GeneratorImpl::AddColumns(
587+
const StructuredQuery::ExperimentalAddColumns::Decoder& add_columns) {
588+
// Validate required fields
589+
if (!add_columns.has_core_query()) {
590+
return base::ErrStatus("AddColumns must specify a core query");
591+
}
592+
if (!add_columns.has_input_query()) {
593+
return base::ErrStatus("AddColumns must specify an input query");
594+
}
595+
if (!add_columns.has_equality_columns() &&
596+
!add_columns.has_freeform_condition()) {
597+
return base::ErrStatus(
598+
"AddColumns must specify either equality_columns or "
599+
"freeform_condition");
600+
}
601+
602+
// Validate input_columns
603+
auto input_columns = add_columns.input_columns();
604+
if (!input_columns) {
605+
return base::ErrStatus("AddColumns must specify at least one input column");
606+
}
607+
size_t column_count = 0;
608+
for (auto it = input_columns; it; ++it) {
609+
column_count++;
610+
}
611+
if (column_count == 0) {
612+
return base::ErrStatus("AddColumns must specify at least one input column");
613+
}
614+
615+
// Generate nested sources
616+
std::string core_table = NestedSource(add_columns.core_query());
617+
std::string input_table = NestedSource(add_columns.input_query());
618+
619+
// Build the SELECT clause with all core columns plus input columns
620+
std::string select_clause = "core.*";
621+
for (auto it = add_columns.input_columns(); it; ++it) {
622+
protozero::ConstChars col_name(*it);
623+
if (col_name.size == 0) {
624+
return base::ErrStatus("Input column name cannot be empty");
625+
}
626+
select_clause += ", input." + col_name.ToStdString();
627+
}
628+
629+
// Build the join condition
630+
std::string condition;
631+
if (add_columns.has_equality_columns()) {
632+
StructuredQuery::ExperimentalJoin::EqualityColumns::Decoder eq_cols(
633+
add_columns.equality_columns());
634+
if (!eq_cols.has_left_column()) {
635+
return base::ErrStatus("EqualityColumns must specify a left column");
636+
}
637+
if (!eq_cols.has_right_column()) {
638+
return base::ErrStatus("EqualityColumns must specify a right column");
639+
}
640+
condition = "core." + eq_cols.left_column().ToStdString() + " = input." +
641+
eq_cols.right_column().ToStdString();
642+
} else {
643+
StructuredQuery::ExperimentalJoin::FreeformCondition::Decoder free_cond(
644+
add_columns.freeform_condition());
645+
if (!free_cond.has_left_query_alias()) {
646+
return base::ErrStatus(
647+
"FreeformCondition must specify a left query alias");
648+
}
649+
if (!free_cond.has_right_query_alias()) {
650+
return base::ErrStatus(
651+
"FreeformCondition must specify a right query alias");
652+
}
653+
if (!free_cond.has_sql_expression()) {
654+
return base::ErrStatus("FreeformCondition must specify a sql expression");
655+
}
656+
657+
std::string left_alias = free_cond.left_query_alias().ToStdString();
658+
std::string right_alias = free_cond.right_query_alias().ToStdString();
659+
660+
// Validate that aliases match "core" and "input"
661+
if (left_alias != "core") {
662+
return base::ErrStatus(
663+
"FreeformCondition left_query_alias must be 'core', got '%s'",
664+
left_alias.c_str());
665+
}
666+
if (right_alias != "input") {
667+
return base::ErrStatus(
668+
"FreeformCondition right_query_alias must be 'input', got '%s'",
669+
right_alias.c_str());
670+
}
671+
672+
condition = free_cond.sql_expression().ToStdString();
673+
}
674+
675+
// Generate the final SQL using LEFT JOIN to keep all core rows
676+
std::string sql = "(SELECT " + select_clause + " FROM " + core_table +
677+
" AS core LEFT JOIN " + input_table + " AS input ON " +
678+
condition + ")";
679+
680+
return sql;
681+
}
682+
578683
base::StatusOr<std::string> GeneratorImpl::ReferencedSharedQuery(
579684
protozero::ConstChars raw_id) {
580685
std::string id = raw_id.ToStdString();

0 commit comments

Comments
 (0)