resource "azurerm_monitor_scheduled_query_rules_alert" "aks_system_agent_pool_count_status" {
count = var.alerts.enable_alerts ? 1 : 0
name = "aks_system_agent_pool_count_status"
location = var.aks_cluster_location
resource_group_name = var.aks_resource_group_name
action {
action_group = [data.azurerm_monitor_action_group.platformDev.0.id]
}
data_source_id = data.azurerm_kubernetes_cluster.cluster.id
description = "Alert when system node pool is reached max threshold value"
enabled = var.alerts.apps_workload.enabled
query = <<-QUERY
let nodepoolMaxnodeCount = "${data.azurerm_kubernetes_cluster.cluster.agent_pool_profile.0.max_count}";
let _minthreshold = 70;
KubeNodeInventory
| extend nodepoolType = todynamic(Labels) //Parse the labels to get the list of node pool types
| extend nodepoolName = todynamic(nodepoolType[0].agentpool)
| where nodepoolName contains "${data.azurerm_kubernetes_cluster.cluster.agent_pool_profile.0.name}"
| extend nodepoolName = tostring(nodepoolName)
| summarize nodeCount = count(Computer) by ClusterName, tostring(nodepoolName), TimeGenerated
| extend scaledpercent = iff(((nodeCount * 100 / nodepoolMaxnodeCount) >= _minthreshold), "warn", "normal")
| where scaledpercent == 'warn'
| summarize arg_max(TimeGenerated, *) by nodeCount, ClusterName, tostring(nodepoolName)
| project ClusterName,
TotalNodeCount= strcat("Total Node Count: ", nodeCount),
ScaledOutPercentage = (nodeCount * 100 / nodepoolMaxnodeCount),
TimeGenerated,
nodepoolName, scaledpercent
QUERY
severity = var.alerts.apps_workload.cluster_agent_pool.severity
frequency = var.alerts.apps_workload.cluster_agent_pool.frequency
time_window = var.alerts.apps_workload.cluster_agent_pool.time_window
trigger {
operator = "GreaterThan"
threshold = var.alerts.apps_workload.cluster_agent_pool.threshold
}
}
error:
Error: Error creating or updating Scheduled Query Rule "aks_system_agent_pool_count_status" (resource group "RG-SIT-APPS-01"): insights.ScheduledQueryRulesClient#CreateOrUpdate: Failure responding to request: StatusCode=400 -- Original Error: autorest/azure: Service returned an error. Status=400 Code="BadRequest" Message="{\r\n \"error\": {\r\n \"message\": \"The request had some invalid properties\",\r\n \"code\": \"BadArgumentError\",\r\n \"correlationId\": \"f132c345-0d4b-43a4-a6ec-77a02e60beaf\",\r\n \"innererror\": {\r\n \"code\": \"SemanticError\",\r\n \"message\": \"A semantic error occurred.\",\r\n \"innererror\": {\r\n \"code\": \"SEM0001\",\r\n \"message\": \"Arithmetic expression cannot be carried-out between I64 and StringBuffer\"\r\n }\r\n }\r\n }\r\n}"
11:20:25 │
11:20:25 │ with module.aks_base_config.azurerm_monitor_scheduled_query_rules_alert.aks_system_agent_pool_count_status[0],
11:20:25 │ on .terraform/modules/aks_base_config/alerts.tf line 290, in resource "azurerm_monitor_scheduled_query_rules_alert" "aks_system_agent_pool_count_status":
11:20:25 │ 290: resource "azurerm_monitor_scheduled_query_rules_alert" "aks_system_agent_pool_count_status" {