Skip to content

Commit 80332c3

Browse files
authored
[Azure OpenAI] Add alerting rule templates (#15412)
* Add alerting rule templates
1 parent e3ebaab commit 80332c3

File tree

5 files changed

+92
-3
lines changed

5 files changed

+92
-3
lines changed

packages/azure_openai/changelog.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# newer versions go on top
2+
- version: "1.10.0"
3+
changes:
4+
- description: Add Alerting Rule Templates.
5+
type: enhancement
6+
link: https://github.com/elastic/integrations/pull/15412
27
- version: "1.9.0"
38
changes:
49
- description: Add a flag `fips_compatible` to control whether the package is allowed in the ECH FedRAMP High environment.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"id": "azure_openai-latency-spike",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[Azure OpenAI] Latency high",
6+
"tags": ["Azure OpenAI"],
7+
"ruleTypeId": ".es-query",
8+
"schedule": {
9+
"interval": "1m"
10+
},
11+
"params": {
12+
"searchType": "esqlQuery",
13+
"timeWindowSize": 10,
14+
"timeWindowUnit": "m",
15+
"esqlQuery": {
16+
"esql": "// Alert triggers when the response latency exceeds the recommended threshold value {5000ms} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the time_to_response in the WHERE clause, which is specified in milliseconds.\nFROM metrics-azure.open_ai-default\n| KEEP azure.open_ai.time_to_response.avg, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| STATS time_to_response = MAX(azure.open_ai.time_to_response.avg) BY azure.dimensions.model_deployment_name\n| WHERE time_to_response > 5000\n| EVAL time_to_response = ROUND(time_to_response, 2)\n| SORT time_to_response DESC"
17+
},
18+
"groupBy": "row",
19+
"timeField": "@timestamp"
20+
},
21+
"alertDelay": {
22+
"active": 2
23+
}
24+
},
25+
"managed": true,
26+
"coreMigrationVersion": "8.8.0",
27+
"typeMigrationVersion": "10.1.0"
28+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"id": "azure_openai-provisioned-utilization",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[Azure OpenAI] Provisioned Utilization above threshold",
6+
"tags": ["Azure OpenAI"],
7+
"ruleTypeId": ".es-query",
8+
"schedule": {
9+
"interval": "1m"
10+
},
11+
"params": {
12+
"searchType": "esqlQuery",
13+
"timeWindowSize": 10,
14+
"timeWindowUnit": "m",
15+
"esqlQuery": {
16+
"esql": "// Alert triggers when the provisioned utilization exceeds the recommended threshold value {85%} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the provisioned_utilization in the WHERE clause, which is specified in percent.\nFROM metrics-azure.open_ai-default\n| KEEP azure.open_ai.provisioned_managed_utilization_v2.avg, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| STATS provisioned_utilization = MAX(azure.open_ai.provisioned_managed_utilization_v2.avg) * 100 BY azure.dimensions.model_deployment_name\n| WHERE provisioned_utilization > 85\n| EVAL provisioned_utilization = ROUND(provisioned_utilization, 2)\n| SORT provisioned_utilization DESC"
17+
},
18+
"groupBy": "row",
19+
"timeField": "@timestamp"
20+
},
21+
"alertDelay": {
22+
"active": 2
23+
}
24+
},
25+
"managed": true,
26+
"coreMigrationVersion": "8.8.0",
27+
"typeMigrationVersion": "10.1.0"
28+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"id": "azure_openai-quota-error-rates",
3+
"type": "alerting_rule_template",
4+
"attributes": {
5+
"name": "[Azure OpenAI] Quota Error Rates above threshold",
6+
"tags": ["Azure OpenAI"],
7+
"ruleTypeId": ".es-query",
8+
"schedule": {
9+
"interval": "1m"
10+
},
11+
"params": {
12+
"searchType": "esqlQuery",
13+
"timeWindowSize": 10,
14+
"timeWindowUnit": "m",
15+
"esqlQuery": {
16+
"esql": "// Alert triggers when the quota_error count is greater than recommended threshold value {5} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the quota_error count in the WHERE clause.\nFROM logs-azure_openai.logs-default\n| KEEP http.response.status_code, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| WHERE http.response.status_code == 429\n| STATS quota_error = COUNT(*) BY azure.dimensions.model_deployment_name\n| WHERE quota_error > 5\n| SORT quota_error DESC"
17+
},
18+
"groupBy": "row",
19+
"timeField": "@timestamp"
20+
},
21+
"alertDelay": {
22+
"active": 2
23+
}
24+
},
25+
"managed": true,
26+
"coreMigrationVersion": "8.8.0",
27+
"typeMigrationVersion": "10.1.0"
28+
}

packages/azure_openai/manifest.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
format_version: 3.1.3
1+
format_version: 3.4.0
22
name: azure_openai
33
title: "Azure OpenAI"
4-
version: "1.9.0"
4+
version: "1.10.0"
55
source:
66
license: "Elastic-2.0"
77
description: "Collects Azure OpenAI Logs and Metrics"
@@ -14,7 +14,7 @@ categories:
1414
- security
1515
conditions:
1616
kibana:
17-
version: "^8.17.1 || ^9.0.0"
17+
version: "^8.19.0 || ^9.2.1"
1818
elastic:
1919
subscription: "basic"
2020
vars:

0 commit comments

Comments
 (0)