Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions udfs/community/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1641,6 +1641,21 @@ SELECT
'tino', 'julie', 'jordan'
```

### [insight_counts(STRUCT<stage_performance_standalone_insights ARRAY<STRUCT<stage_id INT64, slot_contention BOOL, insufficient_shuffle_quota BOOL, bi_engine_reasons ARRAY<STRUCT<code STRING, message STRING>>>>, stage_performance_change_insights ARRAY<STRUCT<stage_id INT64, input_data_change STRUCT<records_read_diff_percentage FLOAT64>>>>)](insight_counts.sqlx)
Input: performanceInsights: query_info.performance_insights in the JOBS Information Schema [view](https://cloud.google.com/bigquery/docs/information-schema-jobs-by-organization#schema)
Output: An array of structs representing each performance insight and their count throughout the stages of a job.

```sql
SELECT
bqutil.fn.insight_counts(query_info.performance_insights),

`[
STRUCT('Slot contention', 1),
STRUCT('Shuffle quota issue', 1),
STRUCT('Input data change', 1)
]`
```

### [sure_cond(value STRING, cond BOOL)](sure_cond.sqlx)

If `cond` is `FALSE` the function cause error.
Expand Down
61 changes: 61 additions & 0 deletions udfs/community/insight_counts.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
config { hasOutput: true }
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- simplified_query_insights:
-- Helper function to retrieve query insights for a BigQuery job as a list of strings.
-- Input:
-- performanceInsights: query_info.performance_insights in the JOBS Information Schema view
-- https://cloud.google.com/bigquery/docs/information-schema-jobs-by-organization#schema
-- Output: An array of structs representing each performance insight and their count throughout the stages of a job.
CREATE OR REPLACE FUNCTION ${self()}(performanceInsights STRUCT<avgPreviousExecutionMs INT64, stage_performance_standalone_insights ARRAY<STRUCT<stage_id INT64, slot_contention BOOL, insufficient_shuffle_quota BOOL, bi_engine_reasons ARRAY<STRUCT<code STRING, message STRING>>>>, stage_performance_change_insights ARRAY<STRUCT<stage_id INT64, input_data_change STRUCT<records_read_diff_percentage FLOAT64>>>>)
OPTIONS (
description="""Helper function to retrieve query insights for a BigQuery job as a list of structs.
Input:
performanceInsights: query_info.performance_insights in the JOBS Information Schema view
https://cloud.google.com/bigquery/docs/information-schema-jobs-by-organization#schema
Output: An array of structs representing each performance insight and their count throughout the stages of a job.
"""
)
AS (
ARRAY(
SELECT AS STRUCT insight, COUNT(*) AS count
FROM (SELECT insight FROM UNNEST(
(
SELECT ARRAY_CONCAT_AGG(insight) AS insights_name
FROM (
SELECT
CASE
WHEN stage.slot_contention AND NOT stage.insufficient_shuffle_quota THEN ['Slot contention']
WHEN stage.insufficient_shuffle_quota AND NOT stage.slot_contention THEN ['Shuffle quota issue']
WHEN stage.insufficient_shuffle_quota AND stage.slot_contention THEN ['Shuffle quota issue', 'Slot contention']
END
AS insight
FROM
UNNEST(performanceInsights.stage_performance_standalone_insights) AS stage
UNION ALL
SELECT
['Input data change'] AS insight
FROM
UNNEST(performanceInsights.stage_performance_change_insights) AS change
WHERE
change.input_data_change.records_read_diff_percentage IS NOT NULL )
WHERE
insight IS NOT NULL
)
) AS insight)
GROUP BY insight
));
46 changes: 46 additions & 0 deletions udfs/community/test_cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,52 @@ generate_udf_test("int", [
expected_output: `CAST(7 AS INT64)`
},
]);
generate_udf_test("insight_counts", [

{
inputs: [`STRUCT(
1000 AS avgPreviousExecutionMs,
ARRAY<STRUCT<stage_id INT64, slot_contention BOOL, insufficient_shuffle_quota BOOL, bi_engine_reasons ARRAY<STRUCT<code STRING, message STRING>>>>[
(1, true, false, [STRUCT('code1', 'message1')]),
(2, false, true, [STRUCT('code2', 'message2')]),
(3, true, true, NULL)
] AS stage_performance_standalone_insights,
ARRAY<STRUCT<stage_id INT64, input_data_change STRUCT<records_read_diff_percentage FLOAT64>>>[
(3, STRUCT(10.0))
] AS stage_performance_change_insights)`
],
expected_output: `[
STRUCT('Slot contention' AS insight, 2 AS count),
STRUCT('Shuffle quota issue' AS insight, 2 AS count),
STRUCT('Input data change' AS insight, 1 AS count)
]`
},
{
inputs: [`STRUCT(
1000 AS avgPreviousExecutionMs,
ARRAY<STRUCT<stage_id INT64, slot_contention BOOL, insufficient_shuffle_quota BOOL, bi_engine_reasons ARRAY<STRUCT<code STRING, message STRING>>>>[
(1, true, false, NULL),
(2, true, false, NULL)
] AS stage_performance_standalone_insights,
ARRAY<STRUCT<stage_id INT64, input_data_change STRUCT<records_read_diff_percentage FLOAT64>>>[] AS stage_performance_change_insights)`
],
expected_output: `[STRUCT('Slot contention' AS insight, 2 AS count)]`
},
]);

generate_udf_test("insight_counts", [

{
inputs: [`STRUCT(
NULL AS avgPreviousExecutionMs,
ARRAY<STRUCT<stage_id INT64, slot_contention BOOL, insufficient_shuffle_quota BOOL, bi_engine_reasons ARRAY<STRUCT<code STRING, message STRING>>>>[] AS stage_performance_standalone_insights,
ARRAY<STRUCT<stage_id INT64, input_data_change STRUCT<records_read_diff_percentage FLOAT64>>>[] AS stage_performance_change_insights)`
],
expected_output: `[]`
},
]);


generate_udf_test("json_extract_keys", [
{
inputs: [`'{"foo" : "cat", "bar": "dog", "hat": "rat"}'`],
Expand Down