Skip to content

Commit

Permalink
Merge branch 'V3.7' of https://github.com/goodrain/rainbond into V3.7
Browse files Browse the repository at this point in the history
  • Loading branch information
barnettZQG committed Aug 14, 2018
2 parents c4c2006 + 47c51ba commit e0a2a41
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 16 deletions.
2 changes: 1 addition & 1 deletion grctl/cmd/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ func NewCmdNode() cli.Command {
table.AddRow("down", v.DownTime)
table.AddRow("connected", v.Connected)
fmt.Println(table)
fmt.Printf("-------------------ervice health-----------------------\n")
fmt.Printf("-------------------service health-----------------------\n")
serviceTable := termtables.CreateTable()
serviceTable.AddHeaders("Title", "Result", "Message")
extractReady(serviceTable, v, "Ready")
Expand Down
57 changes: 42 additions & 15 deletions monitor/prometheus/rules_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"gopkg.in/yaml.v2"
"os"
"github.com/goodrain/rainbond/cmd/monitor/option"

)

type AlertingRulesConfig struct {
Expand All @@ -28,26 +27,13 @@ type RulesConfig struct {

type AlertingRulesManager struct {
RulesConfig *AlertingRulesConfig
config *option.Config
config *option.Config
}

func NewRulesManager(config *option.Config) *AlertingRulesManager {
a := &AlertingRulesManager{
RulesConfig: &AlertingRulesConfig{
Groups: []*AlertingNameConfig{
&AlertingNameConfig{

Name: "InstanceHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "InstanceDown",
Expr: "up == 0",
For: "3m",
Labels: map[string]string{},
Annotations: map[string]string{"summary": "builder {{$labels.instance}} down", "description":"{{$labels.instance}} of job {{$labels.job}} has been down for more than 3 minutes"},
},
},
},
&AlertingNameConfig{

Name: "BuilderHealth",
Expand Down Expand Up @@ -161,6 +147,47 @@ func NewRulesManager(config *option.Config) *AlertingRulesManager {
},
},
},
&AlertingNameConfig{

Name: "NodeHealth",
Rules: []*RulesConfig{
&RulesConfig{
Alert: "high_cpu_usage_on_node",
Expr: "sum by(instance) (rate(process_cpu_seconds_total[5m])) * 100 > 70",
For: "5m",
Labels: map[string]string{"service": "node_cpu"},
Annotations: map[string]string{"description": "{{ $labels.instance }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.", "summary": "HIGH CPU USAGE WARNING ON '{{ $labels.instance }}'"},
},
&RulesConfig{
Alert: "high_la_usage_on_node",
Expr: "node_load5 > 5",
For: "5m",
Labels: map[string]string{"service": "node_load5"},
Annotations: map[string]string{"description": "{{ $labels.instance }} has a high load average. Load Average 5m is {{ humanize $value}}.", "summary": "HIGH LOAD AVERAGE WARNING ON '{{ $labels.instance }}'"},
},
&RulesConfig{
Alert: "node_running_out_of_disk_space",
Expr: "(node_filesystem_size{mountpoint='/'} - node_filesystem_free{mountpoint='/'}) * 100 / node_filesystem_size{mountpoint='/'} > 80",
For: "5m",
Labels: map[string]string{"service": "node_running_out_of_disk_space"},
Annotations: map[string]string{"description": "More than 80% of disk used. Disk usage {{ humanize $value }}%.", "summary": "LOW DISK SPACE WARING:NODE '{{ $labels.instance }}"},
},
&RulesConfig{
Alert: "monitoring_service_down",
Expr: "up == 0",
For: "5m",
Labels: map[string]string{"service": "service_down"},
Annotations: map[string]string{"description": "The monitoring service '{{ $labels.job }}' is down.", "summary": "MONITORING SERVICE DOWN WARNING:NODE '{{ $labels.instance }}'"},
},
&RulesConfig{
Alert: "high_memory_usage_on_node",
Expr: "((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal) * 100 > 80",
For: "5m",
Labels: map[string]string{"service": "node_memory"},
Annotations: map[string]string{"description": "{{ $labels.instance }} is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}%.", "summary": "HIGH MEMORY USAGE WARNING TASK ON '{{ $labels.instance }}'"},
},
},
},
},
},
config: config,
Expand Down

0 comments on commit e0a2a41

Please sign in to comment.