[Azure OpenAI] Add alerting rule templates (#15412)

muthu-mps · web-flow · commit 80332c309839 · 2025-11-24T18:25:46.000+05:30
* Add alerting rule templates
diff --git a/packages/azure_openai/changelog.yml b/packages/azure_openai/changelog.yml
@@ -1,4 +1,9 @@
 # newer versions go on top
+- version: "1.10.0"
+  changes:
+    - description: Add Alerting Rule Templates.
+      type: enhancement
+      link: https://github.com/elastic/integrations/pull/15412
 - version: "1.9.0"
   changes:
     - description: Add a flag `fips_compatible` to control whether the package is allowed in the ECH FedRAMP High environment.
diff --git a/packages/azure_openai/kibana/alerting_rule_template/azure_openai-latency-spike.json b/packages/azure_openai/kibana/alerting_rule_template/azure_openai-latency-spike.json
@@ -0,0 +1,28 @@
+{
+  "id": "azure_openai-latency-spike",
+  "type": "alerting_rule_template",
+  "attributes": {
+    "name": "[Azure OpenAI] Latency high",
+    "tags": ["Azure OpenAI"],
+    "ruleTypeId": ".es-query",
+    "schedule": {
+      "interval": "1m"
+    },
+    "params": {
+      "searchType": "esqlQuery",
+      "timeWindowSize": 10,
+      "timeWindowUnit": "m",
+      "esqlQuery": {
+        "esql": "// Alert triggers when the response latency exceeds the recommended threshold value {5000ms} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the time_to_response in the WHERE clause, which is specified in milliseconds.\nFROM metrics-azure.open_ai-default\n| KEEP azure.open_ai.time_to_response.avg, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| STATS time_to_response = MAX(azure.open_ai.time_to_response.avg) BY azure.dimensions.model_deployment_name\n| WHERE time_to_response > 5000\n| EVAL time_to_response = ROUND(time_to_response, 2)\n| SORT time_to_response DESC"
+      },
+      "groupBy": "row",
+      "timeField": "@timestamp"
+    },
+    "alertDelay": {
+      "active": 2
+    }
+  },
+  "managed": true,
+  "coreMigrationVersion": "8.8.0",
+  "typeMigrationVersion": "10.1.0"
+}
diff --git a/packages/azure_openai/kibana/alerting_rule_template/azure_openai-provisioned-utilization.json b/packages/azure_openai/kibana/alerting_rule_template/azure_openai-provisioned-utilization.json
@@ -0,0 +1,28 @@
+{
+  "id": "azure_openai-provisioned-utilization",
+  "type": "alerting_rule_template",
+  "attributes": {
+    "name": "[Azure OpenAI] Provisioned Utilization above threshold",
+    "tags": ["Azure OpenAI"],
+    "ruleTypeId": ".es-query",
+    "schedule": {
+      "interval": "1m"
+    },
+    "params": {
+      "searchType": "esqlQuery",
+      "timeWindowSize": 10,
+      "timeWindowUnit": "m",
+      "esqlQuery": {
+        "esql": "// Alert triggers when the provisioned utilization exceeds the recommended threshold value {85%} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the provisioned_utilization in the WHERE clause, which is specified in percent.\nFROM metrics-azure.open_ai-default\n| KEEP azure.open_ai.provisioned_managed_utilization_v2.avg, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| STATS provisioned_utilization = MAX(azure.open_ai.provisioned_managed_utilization_v2.avg) * 100 BY azure.dimensions.model_deployment_name\n| WHERE provisioned_utilization > 85\n| EVAL provisioned_utilization = ROUND(provisioned_utilization, 2)\n| SORT provisioned_utilization DESC"
+      },
+      "groupBy": "row",
+      "timeField": "@timestamp"
+    },
+    "alertDelay": {
+      "active": 2
+    }
+  },
+  "managed": true,
+  "coreMigrationVersion": "8.8.0",
+  "typeMigrationVersion": "10.1.0"
+}
diff --git a/packages/azure_openai/kibana/alerting_rule_template/azure_openai-quota-error-rates.json b/packages/azure_openai/kibana/alerting_rule_template/azure_openai-quota-error-rates.json
@@ -0,0 +1,28 @@
+{
+  "id": "azure_openai-quota-error-rates",
+  "type": "alerting_rule_template",
+  "attributes": {
+    "name": "[Azure OpenAI] Quota Error Rates above threshold",
+    "tags": ["Azure OpenAI"],
+    "ruleTypeId": ".es-query",
+    "schedule": {
+      "interval": "1m"
+    },
+    "params": {
+      "searchType": "esqlQuery",
+      "timeWindowSize": 10,
+      "timeWindowUnit": "m",
+      "esqlQuery": {
+        "esql": "// Alert triggers when the quota_error count is greater than recommended threshold value {5} within the look back time window.\n// The alert is grouped by Model Deployment Name.\n// You can adjust the threshold value by modifying the quota_error count in the WHERE clause.\nFROM logs-azure_openai.logs-default\n| KEEP http.response.status_code, azure.dimensions.model_deployment_name, @timestamp\n| WHERE azure.dimensions.model_deployment_name IS NOT NULL\n| WHERE http.response.status_code == 429\n| STATS quota_error = COUNT(*) BY azure.dimensions.model_deployment_name\n| WHERE quota_error > 5\n| SORT quota_error DESC"
+      },
+      "groupBy": "row",
+      "timeField": "@timestamp"
+    },
+    "alertDelay": {
+      "active": 2
+    }
+  },
+  "managed": true,
+  "coreMigrationVersion": "8.8.0",
+  "typeMigrationVersion": "10.1.0"
+}
diff --git a/packages/azure_openai/manifest.yml b/packages/azure_openai/manifest.yml
@@ -1,7 +1,7 @@
-format_version: 3.1.3
+format_version: 3.4.0
 name: azure_openai
 title: "Azure OpenAI"
-version: "1.9.0"
+version: "1.10.0"
 source:
   license: "Elastic-2.0"
 description: "Collects Azure OpenAI Logs and Metrics"
@@ -14,7 +14,7 @@ categories:
   - security
 conditions:
   kibana:
-    version: "^8.17.1 || ^9.0.0"
+    version: "^8.19.0 || ^9.2.1"
   elastic:
     subscription: "basic"
 vars: