• Azure AKS集群监控告警表达式配置


    背景需求

    Azure AKS集群中,需要对部署的服务进行监控和告警,需要创建并启用预警规则,而这里怎么去监控每个pod级别的CPU和内存,需要自己写搜索查询
    在这里插入图片描述

    解决方法

    搜索和查询的语句如下,需要自己替换其中的部分信息,其中的clusterID需要在AKS集群中的概述-右侧JSON视图-id,其中id就是我们要的clusterID,然后替换下面的clusterID字段内容。最后的 percentage 也需要根据实际情况来设置对应值。

    #内存
    let metricUsageCounterName = 'memoryRssBytes';
    let metricLimitCounterName = 'memoryLimitBytes';
    let clusterID = "/subscriptions/xxxxxxxxxxx";
    let CachedFilteredPerfTable = Perf
    | where ObjectName == 'K8SContainer'
    | where InstanceName startswith clusterID
    | project Node = Computer, TimeGenerated, CounterName, CounterValue, InstanceName;
    let LimitsTable = CachedFilteredPerfTable
    | where CounterName =~ metricLimitCounterName
    | summarize arg_max(TimeGenerated, *) by Node, InstanceName
    | project Node, InstanceName, LimitsValue = CounterValue, TimeGenerated;
    let MetaDataTable = KubePodInventory
    | where isnotempty(ClusterName) | where isnotempty(Namespace) | where isnotempty(Computer)
    | where ClusterId =~ clusterID
    | project TimeGenerated, ClusterId, Namespace, ControllerName, Node = Computer, Pod = Name, ContainerInstance = ContainerName, ContainerID, ControllerKind = ControllerKind
    | summarize arg_max(TimeGenerated, *) by Node, ContainerInstance
    | project Namespace, ControllerName, Node, Pod, ContainerInstance, InstanceName = strcat(ClusterId, '/', ContainerInstance), ContainerID, ControllerKind, 
    ContainerName = tostring(split(ContainerInstance, '/')[1]), LastPodInventoryTimeGenerated = TimeGenerated, ClusterId
    | join kind=leftouter (LimitsTable) on Node, InstanceName
    | project Namespace, ControllerName, Node, Pod, InstanceName, ContainerID, LimitsValue, ControllerKind, ContainerName, ContainerInstance, LastPodInventoryTimeGenerated, ClusterId;
    let AggregationTable = CachedFilteredPerfTable
    | where CounterName =~ metricUsageCounterName
    | project TimeGenerated, Node, InstanceName, CounterValue
    | summarize  Aggregation = percentile(CounterValue, 95) by Node, InstanceName 
    | project Node, InstanceName, Aggregation;
    MetaDataTable
    | join kind= leftouter( AggregationTable ) on Node, InstanceName
    | order by ContainerName asc, ContainerName
    | extend ContainerIdentity = strcat(ContainerName, '|', Pod)
    | extend percentage = Aggregation/LimitsValue * 100
    | project ContainerIdentity, percentage, Aggregation, LimitsValue, Node, ControllerName, ControllerKind, ContainerID, ContainerInstance, InstanceName, Namespace, LastPodInventoryTimeGenerated, ClusterId
    | where percentage > 80
    
    
    
    
    
    
    #CPU
    let metricUsageCounterName = 'cpuUsageNanoCores';
    let metricLimitCounterName = 'cpuLimitNanoCores'; 
    let clusterID = "/subscriptions/xxxxxxxxxxx";
    let CachedFilteredPerfTable = Perf
    | where ObjectName == 'K8SContainer'
    | where InstanceName startswith clusterID
    | project Node = Computer, TimeGenerated, CounterName, CounterValue, InstanceName;
    let LimitsTable = CachedFilteredPerfTable
    | where CounterName =~ metricLimitCounterName
    | summarize arg_max(TimeGenerated, *) by Node, InstanceName
    | project Node, InstanceName, LimitsValue = CounterValue/1000000, TimeGenerated;
    let MetaDataTable = KubePodInventory
    | where isnotempty(ClusterName) | where isnotempty(Namespace) | where isnotempty(Computer)
    | where ClusterId =~ clusterID
    | project TimeGenerated, ClusterId, Namespace, ControllerName, Node = Computer, Pod = Name, ContainerInstance = ContainerName, ContainerID, ControllerKind = ControllerKind
    | summarize arg_max(TimeGenerated, *) by Node, ContainerInstance
    | project Namespace, ControllerName, Node, Pod, ContainerInstance, InstanceName = strcat(ClusterId, '/', ContainerInstance), ContainerID, ControllerKind, 
    ContainerName = tostring(split(ContainerInstance, '/')[1]), LastPodInventoryTimeGenerated = TimeGenerated, ClusterId
    | join kind=leftouter (LimitsTable) on Node, InstanceName
    | project Namespace, ControllerName, Node, Pod, InstanceName, ContainerID, LimitsValue, ControllerKind, ContainerName, ContainerInstance, LastPodInventoryTimeGenerated, ClusterId;
    let AggregationTable = CachedFilteredPerfTable
    | where CounterName =~ metricUsageCounterName
    | project TimeGenerated, Node, InstanceName, CounterValue = CounterValue/1000000
    | summarize  Aggregation = percentile(CounterValue, 95) by Node, InstanceName 
    | project Node, InstanceName, Aggregation;
    MetaDataTable
    | join kind= leftouter( AggregationTable ) on Node, InstanceName
    | order by ContainerName asc, ContainerName
    | extend ContainerIdentity = strcat(ContainerName, '|', Pod)
    | extend percentage = Aggregation/LimitsValue * 100
    | project ContainerIdentity, percentage, Aggregation, LimitsValue, Node, ControllerName, ControllerKind, ContainerID, ContainerInstance, InstanceName, Namespace, LastPodInventoryTimeGenerated, ClusterId
    | where percentage > 80
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
  • 相关阅读:
    Qt的网络连接方式
    边缘计算是如何为元宇宙提供动力的?
    ouster-32激光雷达使用---雷达参数配置
    基因组学复习题
    stable diffusion本地部署教程
    一招教会你配置Jenkins钉钉消息通知
    一图读懂TWT
    vue2.6 和 2.7对可选链的不同支持导致构建失败
    word文档损坏不可避免的一些情况
    javascript之for循环介绍
  • 原文地址:https://blog.csdn.net/weixin_44388689/article/details/138116736