diff --git a/build/chart/ack-node-problem-detector/templates/node-problem-detector-config.yaml b/build/chart/ack-node-problem-detector/templates/node-problem-detector-config.yaml
index 65e2baa0..b3946f5c 100644
--- a/build/chart/ack-node-problem-detector/templates/node-problem-detector-config.yaml
+++ b/build/chart/ack-node-problem-detector/templates/node-problem-detector-config.yaml
@@ -13,37 +13,87 @@ data:
                 "reason": "KernelHasNoDeadlock",
                 "message": "kernel has no deadlock"
             },
+            {
+            	"type": "Kernel.KernelBug",
+            	"reason": "KernelHasNoBug",
+            	"message": "kernel has no bug"
+            },
             {
                 "type": "ReadonlyFilesystem",
                 "reason": "FilesystemIsReadOnly",
                 "message": "Filesystem is read-only"
+            },
+            {
+            	"type": "Kernel.CPUTemperatureHigh",
+            	"reason": "CPUTemperatureNormal",
+            	"message": "CPU temperature normal"
+            },
+            {
+            	"type": "Kernel.HardwareErr",
+            	"reason": "HardwareHasNoError",
+            	"message": "Hardware has no error"
+            },
+            {
+            	"type": "Kernel.CPUSoftLockErr",
+            	"reason": "CPUHasNotSoftLockError",
+            	"message": "CPU has not soft lockup"
+            },
+            {
+            	"type": "Kernel.CPUHardLockErr",
+            	"reason": "CPUHasNotHardLockError",
+            	"message": "CPU has not hard lockup"
+            },
+            {
+                "type": "Kernel.OOMKilling",
+                "reason": "NothingOom",
+                "message": "nothing oom"
             }
         ],
         "rules": [
             {
-                "type": "temporary",
-                "reason": "PodOOMKilling",
-                "pattern": "Task in /kubepods.slice/(.+) killed as a result of limit of .*"
+            	"type": "permanent",
+            	"condition": "Kernel.KernelBug",
+            	"reason": "KernelBug",
+            	"pattern": "kernel BUG at.*"
             },
             {
-                "type": "temporary",
-                "reason": "TaskHung",
-                "pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
+            	"type": "permanent",
+            	"condition": "Kernel.KernelBug",
+            	"reason": "KernelBug",
+            	"pattern": "Kernel panic - not syncing.*"
             },
             {
-                "type": "temporary",
-                "reason": "UnregisterNetDevice",
-                "pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
+            	"type": "permanent",
+            	"condition": "Kernel.KernelBug",
+            	"reason": "KernelBug",
+            	"pattern": "BUG: unable to handle kernel NULL pointer dereference at.*"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.KernelBug",
+            	"reason": "KernelBug",
+            	"pattern": "general protection fault:.*"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.KernelBug",
+            	"reason": "KernelBug",
+            	"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
+            },
+            {
+            	"type": "permanent",
+            	"reason": "OOMKilling",
+            	"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child"
             },
             {
                 "type": "temporary",
-                "reason": "KernelOops",
-                "pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
+                "reason": "TaskHung",
+                "pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
             },
             {
                 "type": "temporary",
-                "reason": "KernelOops",
-                "pattern": "divide error: 0000 \\[#\\d+\\] SMP"
+                "reason": "UnregisterNetDevice",
+                "pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
             },
             {
                 "type": "permanent",
@@ -62,6 +112,36 @@ data:
                 "condition": "ReadonlyFilesystem",
                 "reason": "FilesystemIsReadOnly",
                 "pattern": "Remounting filesystem read-only"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.CPUTemperatureHigh",
+            	"reason": "CPUTemperatureHigh",
+            	"pattern": "temperature above threshold.*"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.HardwareErr",
+            	"reason": "NvmeError",
+            	"pattern": "nvme.* Timeout I/O"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.HardwareErr",
+            	"reason": "NvmeError",
+            	"pattern": "nvme.* timeout.*"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.CPUSoftLockErr",
+            	"reason": "CPUSoftLockup",
+            	"pattern": "BUG: soft lockup.*"
+            },
+            {
+            	"type": "permanent",
+            	"condition": "Kernel.CPUHardLockErr",
+            	"reason": "CPUHardLockup",
+            	"pattern": "NMI watchdog: Watchdog detected hard LOCKUP.*"
             }
         ]
     }
@@ -149,160 +229,136 @@ data:
     echo "NTP service is running"
     exit $OK
 
-  instance_expired_checker.json: |
+  fd-problem-monitor.json: |
     {
       "plugin": "custom",
       "pluginConfig": {
-        "invoke_interval": "600s",
+        "invoke_interval": "120s",
         "timeout": "30s",
         "max_output_length": 80,
-        "concurrency": 3,
-        "enable_message_change_based_condition_update": false
+        "concurrency": 3
       },
-      "source": "instance_termination_custom_checker",
+      "source": "fd-custom-plugin-monitor",
       "conditions": [
         {
-          "type": "InstanceExpired",
-          "reason": "InstanceNotToBeTerminated",
-          "message": "instance is not going to be terminated"
+          "type": "FDPressure",
+          "reason": "NodeHasNoFDPressure",
+          "message": "node has no fd pressure"
         }
       ],
       "rules": [
-        {
-          "type": "temporary",
-          "reason": "InstanceToBeTerminated",
-          "path": "./config/plugin/instance_expired_checker.sh",
-          "timeout": "30s"
-        },
         {
           "type": "permanent",
-          "condition": "InstanceExpired",
-          "reason": "InstanceToBeTerminated",
-          "path": "./config/plugin/instance_expired_checker.sh",
+          "condition": "FDPressure",
+          "reason": "NodeHasFDPressure",
+          "message": "too many fds have been used",
+          "path": "/config/plugin/check_fd.sh",
           "timeout": "30s"
         }
       ]
     }
-  instance_expired_checker.sh: |
+  check_fd.sh: |
     #!/bin/bash
+    # check max fd open files
     OK=0
     NONOK=1
     UNKNOWN=2
 
-    check_url='http://100.100.100.200/latest/meta-data/instance/spot/termination-time'
-    for ((i=1; i<=5; i ++))
-    do
-      resp=$(curl --max-time 5 -s $check_url)
-      if [ $? != 0 ]; then
-        sleep 1
-      else
-        echo $resp
-        date --date $resp +"%s"
-        if [ $? != 0 ]; then
-          exit $OK
-        else
-          echo "instance is going to be terminated at $resp"
-          exit $NONOK
-        fi
-      fi
-    done
-    echo "curl $check_url exe fail after try 5 times"
+    cd /host/proc
+
+    count=$(find -maxdepth 1 -type d -name '[0-9]*' | xargs -I {} ls {}/fd | wc -l)
+    max=$(cat /host/proc/sys/fs/file-max)
+
+    if [[ $count -gt $((max*80/100)) ]]; then
+    echo "current fd usage is $count and max is $max"
+    exit $NONOK
+    fi
+    echo "node has no fd pressure"
     exit $OK
 
-  ram-role-problem-monitor.json: |
+
+  docker-status-monitor.json: |
     {
       "plugin": "custom",
       "pluginConfig": {
-        "timeout": "60s",
-        "invoke_interval": "600s",
+        "timeout": "30s",
+        "invoke_interval": "120s",
         "concurrency": 3
       },
-      "source": "ram-role-monitor",
+      "source": "docker-custom-plugin-monitor",
       "conditions": [
         {
-          "type": "RAMRoleError",
-          "reason": "NodeHasRAMRole",
-          "message": "node has ram role"
+          "type": "dockerStatus",
+          "reason": "dockerOpen",
+          "message": "node docker service open"
         }
       ],
       "rules": [
         {
           "type": "permanent",
-          "condition": "RAMRoleError",
-          "reason": "NodeHasNoRAMRole",
-          "message": "node has no ram role",
-          "path": "/config/plugin/check_ram-role.sh",
-          "timeout": "60s"
+          "condition": "dockerStatus",
+          "reason": "dockerClose",
+          "message": "node close docker",
+          "path": "/config/plugin/check_docker.sh",
+          "timeout": "30s"
         }
       ]
     }
-  check_ram-role.sh: |
+  check_docker.sh: |
     #!/bin/bash
-    # check node has ram-role
     OK=0
     NONOK=1
     UNKNOWN=2
-    for ((i=1; i<=5; i ++))
-    do
-      ram_role=$(curl --max-time 5 http://100.100.100.200/latest/meta-data/ram/security-credentials/ )
-      resp=$(curl --max-time 5 http://100.100.100.200/latest/meta-data/ram/security-credentials/$ram_role)
-      found=$(echo $resp | grep "Success")
-      if [[ "$found" != "" ]]; then
-        echo "node has ram role"
-        exit $OK
-      fi
-      sleep 5
-    done
-    echo "node has no ram role"
+    A=$(systemctl status docker |grep "active (running)")
+    if [[ $A != "" ]]; then
+    echo "node open docker service"
+    exit $OK
+    fi
+    echo "node close docker service"
+    systemctl start docker
     exit $NONOK
 
 
-  fd-problem-monitor.json: |
+  kubelet-status-monitor.json: |
     {
       "plugin": "custom",
       "pluginConfig": {
-        "invoke_interval": "120s",
         "timeout": "30s",
-        "max_output_length": 80,
+        "invoke_interval": "120s",
         "concurrency": 3
       },
-      "source": "fd-custom-plugin-monitor",
+      "source": "kubelet-custom-plugin-monitor",
       "conditions": [
         {
-          "type": "FDPressure",
-          "reason": "NodeHasNoFDPressure",
-          "message": "node has no fd pressure"
+          "type": "kubeletStatus",
+          "reason": "kubeletOpen",
+          "message": "node kubelet service open"
         }
       ],
       "rules": [
         {
           "type": "permanent",
-          "condition": "FDPressure",
-          "reason": "NodeHasFDPressure",
-          "message": "too many fds have been used",
-          "path": "/config/plugin/check_fd.sh",
+          "condition": "kubeletStatus",
+          "reason": "kubeletClose",
+          "message": "node close kubelet",
+          "path": "/config/plugin/check_kubelet.sh",
           "timeout": "30s"
         }
       ]
     }
-  check_fd.sh: |
+  check_kubelet.sh: |
     #!/bin/bash
-    # check max fd open files
     OK=0
     NONOK=1
     UNKNOWN=2
-
-    cd /host/proc
-
-    count=$(find -maxdepth 1 -type d -name '[0-9]*' | xargs -I {} ls {}/fd | wc -l)
-    max=$(cat /host/proc/sys/fs/file-max)
-
-    if [[ $count -gt $((max*80/100)) ]]; then
-    echo "current fd usage is $count and max is $max"
-    exit $NONOK
-    fi
-    echo "node has no fd pressure"
+    A=$(systemctl status kubelet |grep "active (running)")
+    if [[ $A != "" ]]; then
+    echo "node open kubelet service"
     exit $OK
+    fi
+    echo "node close kubelet service"
+    systemctl start kubelet
+    exit $NONOK
 
 
   irqbalance-monitor.json: |
@@ -578,54 +634,6 @@ data:
     }
 
 
-  check_csi_hang.sh: |
-    #!/bin/sh
-
-    OK=0
-    NONOK=1
-
-
-    for pid in `ps -ef |grep plugin.csi.alibabacloud | awk '{print $2}'`
-    do
-        checkD=$(cat /host/proc/$pid/status |grep "State.*D")
-        checkP=$(cat /host/proc/$pid/status |grep "Name.*plugin.csi")
-        if [ "$checkP" != "" ] && [ "$checkD" != "" ]; then
-            echo "process diskplugin.csi is in State D"
-            exit $NONOK
-        fi
-    done
-
-    echo "procss diskplugin.csi State ok"
-    exit $OK
-  csi-hang-problem-monitor.json: |
-    {
-      "plugin": "custom",
-      "pluginConfig": {
-        "invoke_interval": "600s",
-        "timeout": "120s",
-        "max_output_length": 80,
-        "concurrency": 3,
-        "enable_message_change_based_condition_update": false
-      },
-      "source": "csi-hang-custom-plugin-monitor",
-      "conditions": [
-        {
-          "type": "CSIProcessWorks",
-          "reason": "CSIProcessWorks",
-          "message": "csi process works"
-        }
-      ],
-      "rules": [
-        {
-          "type": "temporary",
-          "reason": "CSIProcessIsHung",
-          "path": "./config/plugin/check_csi_hang.sh",
-          "timeout": "60s"
-        }
-      ]
-    }
-
-
   check_inodes.sh: |
     #!/bin/bash
     # check inode utilization on block device of mounting point /
@@ -823,7 +831,6 @@ data:
       },
       "source": "system-custom-plugin-monitor.json",
       "conditions": [
-
         {
           "type": "Node.IOPressureOK",
           "reason": "CPULoadOK",
@@ -833,6 +840,11 @@ data:
           "type": "Node.IOHang",
           "reason": "IOHangOK",
           "message": "IO hang is not happening"
+        },
+        {
+          "type": "Node.DiskUnmount",
+          "reason": "DiskUnmount",
+          "message": "Disk mountpoints are ok"
         }
       ],
       "rules": [
@@ -849,6 +861,13 @@ data:
           "reason": "IOHang",
           "path": "/config/plugin/check_io_hang.sh",
           "timeout": "60s"
+        },
+        {
+          "type": "permanent",
+          "condition": "Node.DiskUnmount",
+          "reason": "DiskUnmount",
+          "path": "/config/plugin/check_disk_unmount.sh",
+          "timeout": "5s"
         }
       ]
     }
@@ -1097,7 +1116,7 @@ data:
           "type": "Container.LogPermission",
           "reason": "LogDirectoryPermissionIsOK",
           "message": "/var/log directory permission is 755"
-        }
+        },
       ],
       "rules": [
         {
@@ -1120,7 +1139,7 @@ data:
           "reason": "LogDirectoryPermissionUnhealthy",
           "path": "/config/plugin/check_log_directory_permission.sh",
           "timeout": "60s"
-        }
+        },
       ]
     }
   check_container_net.sh: |
@@ -1328,6 +1347,147 @@ data:
         echo ${UNKNOWN}
         return
     }
+  check_disk_unmount.sh: |
+    #!/usr/bin/env bash
+
+    source "$(cd "$(dirname $0)/.." || exit 0; pwd)/lib/lib.sh"
+
+    mnt_check=$(cat /etc/fstab | grep '^UUID' | grep home | awk '{print $2}' | xargs -r -I {} mountpoint {})
+
+    if [[ $? ==  0 ]]; then
+        exit "${OK}"
+    else
+        echo "${mnt_check}"
+        exit "${NONOK}"
+    fi
+  check_cgroup_num.sh: |
+    #!/bin/bash
+
+    source "$(cd $(dirname $0)/..;pwd)/lib/lib.sh"
+
+    consistency=0
+    errMsg=""
+
+    # cgroup过多
+    cgroupNumThreshold=1000
+
+    output() {
+      if [[ ${consistency} -eq 0 ]]; then
+        echo "cgroup num is normal"
+        exit ${OK}
+      elif [[ ${consistency} -eq 1 ]]; then
+        echo ${errMsg}
+        exit ${NONOK}
+      else
+        echo ${errMsg}
+        exit ${UNKNOWN}
+      fi
+    }
+
+    # content of /proc/cgroups:
+    #
+    # #subsys_name	hierarchy	num_cgroups	enabled
+    # cpuset	8	268	1
+    # cpu	8	268	1
+    # cpuacct	8	268	1
+    # blkio	5	377	1
+    # memory	2	307	1
+    #
+    # $4=="1" represents it's enabled
+
+    # check cpu cgroup num
+    cgroupNum=$(curl -sg 'localhost:9199/api/v1/query?query=node_num_cgroups{cgroup="cpu"}' | \
+    python -c "import sys, json, re; data=json.load(sys.stdin)['data']
+    resultNum=0
+    count=0
+    for r in data['result']:
+      resultKey=str(r['metric']['cgroup'])
+      resultValue=r['value']
+      if(resultKey == 'cpu' and len(resultValue) == 2):
+        resultNum=resultNum+float(resultValue[1])
+        count=count+1
+    print resultNum/count"
+    )
+
+    if [[ $(echo "${cgroupNum} > ${cgroupNumThreshold}" | bc) -eq 1 ]]; then
+      consistency=1
+      errMsg="number of cgroup is too many: ${cgroupNum}"
+      output
+    fi
+
+    output
+  check_cgroup_mount.sh: |
+    #!/bin/bash
+
+    source "$(cd $(dirname $0)/..;pwd)/lib/lib.sh"
+
+    consistency=0
+    errMsg=""
+
+    output() {
+      if [[ ${consistency} -eq 0 ]]; then
+        echo "cgroup mount is normal"
+        exit ${OK}
+      elif [[ ${consistency} -eq 1 ]]; then
+        echo ${errMsg}
+        exit ${NONOK}
+      fi
+    }
+
+    # `lssubsys -a` shows
+    # cpuset,cpu,cpuacct
+    # blkio
+    # memory
+    # ...
+    # `lssubsys -am` shows
+    # memory /cgroup/memory
+    # ...
+    # The cgroup counts from `lssubsys -a` and `lssubsys -am` should be consistent!
+
+    cgroupNumA=$(lssubsys -a | wc -l)
+    if [[ ! $? -eq 0 ]]; then
+        echo "Failed to exec lssubsys -a"
+        exit ${UNKNOWN}
+    fi
+
+    cgroupNumB=$(lssubsys -am | wc -l)
+    if [[ ! $? -eq 0 ]]; then
+        echo "Failed to exec lssubsys -am"
+        exit ${UNKNOWN}
+    fi
+
+    if [[ "${cgroupNumA}" != "${cgroupNumB}" ]]; then
+      consistency=1
+      errMsg="cgroup mount inconsistent - (lssubsys -a): ${cgroupNumA}, (lssubsys -am): ${cgroupNumB} "
+    fi
+
+    output
+  check_systemd_cgroup_exist.sh: |
+    #!/bin/bash
+
+    source "$(cd $(dirname $0)/..;pwd)/lib/lib.sh"
+
+    exist=1
+    errMsg=""
+    systemdCgroupDir="/sys/fs/cgroup/systemd"
+
+    output() {
+      if [[ ${exist} -eq 1 ]]; then
+        echo "${systemdCgroupDir} exists"
+        exit ${OK}
+      elif [[ ${exist} -eq 0 ]]; then
+        echo ${errMsg}
+        exit ${NONOK}
+      fi
+    }
+
+    if [[ ! -d ${systemdCgroupDir} ]]; then
+      exist=0
+      errMsg="${systemdCgroupDir} does not exist!"
+      output
+    fi
+
+    output
 kind: ConfigMap
 metadata:
   name: node-problem-detector-config
diff --git a/build/chart/ack-node-problem-detector/templates/node-problem-detector.yaml b/build/chart/ack-node-problem-detector/templates/node-problem-detector.yaml
index fed6be73..26a38ed5 100644
--- a/build/chart/ack-node-problem-detector/templates/node-problem-detector.yaml
+++ b/build/chart/ack-node-problem-detector/templates/node-problem-detector.yaml
@@ -35,26 +35,17 @@ spec:
         - --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json
         - --prometheus-address=0.0.0.0
         - --prometheus-port=20257
-#        重复规则, prom中有
-#        - --config.custom-plugin-monitor=/config/ntp-problem-monitor.json
         - --config.custom-plugin-monitor=/config/fd-problem-monitor.json
         - --config.custom-plugin-monitor=/config/irqbalance-monitor.json
-#        - --config.custom-plugin-monitor=/config/public-network-problem-monitor.json
         - --config.custom-plugin-monitor=/config/nvidia-gpu-problem-monitor.json
-#        此规则下架： https://yuque.antfin-inc.com/wl3lgn/project/dwoamd
-#        - --config.custom-plugin-monitor=/config/ps-hang-problem-monitor.json
         - --config.custom-plugin-monitor=/config/pid-pressure-problem-monitor.json
         - --config.custom-plugin-monitor=/config/inodes-problem-monitor.json
         - --config.custom-plugin-monitor=/config/network-problem-monitor.json
         - --config.custom-plugin-monitor=/config/docker-offline-monitor.json
-#        ASI规则测试未通过
-#        - --config.custom-plugin-monitor=/config/system-custom-plugin-monitor.json
-#        - --config.custom-plugin-monitor=/config/kubelet-custom-plugin-monitor.json
-#        - --config.custom-plugin-monitor=/config/container-custom-plugin-monitor.json
-#        以下为特定环境规则
-#        - --config.custom-plugin-monitor=/config/csi-hang-problem-monitor.json
-#        - --custom-plugin-monitors=/config/instance_expired_checker.json
-#        - --custom-plugin-monitors=/config/ram-role-problem-monitor.json
+        - --config.custom-plugin-monitor=/config/system-custom-plugin-monitor.json
+        - --config.custom-plugin-monitor=/config/kubelet-custom-plugin-monitor.json
+        - --config.custom-plugin-monitor=/config/docker-status-monitor.json
+        - --config.custom-plugin-monitor=/config/kubelet-status-monitor.json
         env:
           - name: NODE_NAME
             valueFrom:
@@ -217,6 +208,8 @@ spec:
               path: plugin/check_ip_duplicate.sh
             - key: check_log_directory_permission.sh
               path: plugin/check_log_directory_permission.sh
+            - key: check_disk_unmount.sh
+              path: plugin/check_disk_unmount.sh
 
             - key: lib.sh
               path: lib/lib.sh