summaryrefslogtreecommitdiffstats
path: root/modules/tozt
diff options
context:
space:
mode:
authorJesse Luehrs <doy@tozt.net>2020-07-06 01:13:06 -0400
committerJesse Luehrs <doy@tozt.net>2020-07-06 03:09:32 -0400
commitab3ea67db930414b23c02f6de47c943834023197 (patch)
tree6330e315bc5a13bcef5017a72f5c8ac2e69b6768 /modules/tozt
parent976970e0a3d5309960341316250e32fa2cd0b85c (diff)
downloadpuppet-tozt-ab3ea67db930414b23c02f6de47c943834023197.tar.gz
puppet-tozt-ab3ea67db930414b23c02f6de47c943834023197.zip
add network and diskio alerts
Diffstat (limited to 'modules/tozt')
-rw-r--r--modules/tozt/files/kapacitor/disk.tick36
-rw-r--r--modules/tozt/files/kapacitor/net.tick36
-rw-r--r--modules/tozt/manifests/tick.pp6
3 files changed, 78 insertions, 0 deletions
diff --git a/modules/tozt/files/kapacitor/disk.tick b/modules/tozt/files/kapacitor/disk.tick
new file mode 100644
index 0000000..a263a0b
--- /dev/null
+++ b/modules/tozt/files/kapacitor/disk.tick
@@ -0,0 +1,36 @@
+dbrp "telegraf"."autogen"
+
+var message = '''
+{{- if eq .Level "OK" -}}
+{{ index .Tags "host" }} is no longer using excessive disk IO
+{{- else -}}
+{{ index .Tags "host" }} is using excessive disk IO
+{{- end -}}
+'''
+
+var details = '''
+{{- if eq .Level "OK" -}}
+{{ index .Tags "host" }} is now only averaging {{ index .Fields "activity" }}B/s of disk usage
+{{- else -}}
+{{ index .Tags "host" }} has been averaging {{ index .Fields "activity" }}B/s of disk usage for the last 10 minutes
+{{- end -}}
+'''
+
+stream
+|from()
+ .measurement('diskio')
+ .groupBy('host')
+|eval(lambda: "read_bytes" + "write_bytes")
+ .as('activity')
+|sum('activity')
+ .as('activity')
+|derivative('activity')
+ .nonNegative()
+|window()
+ .period(10m)
+|mean('activity')
+ .as('activity')
+|alert()
+ .crit(lambda: "activity" > 512*1024)
+ .message(message)
+ .details(details)
diff --git a/modules/tozt/files/kapacitor/net.tick b/modules/tozt/files/kapacitor/net.tick
new file mode 100644
index 0000000..37b5c98
--- /dev/null
+++ b/modules/tozt/files/kapacitor/net.tick
@@ -0,0 +1,36 @@
+dbrp "telegraf"."autogen"
+
+var message = '''
+{{- if eq .Level "OK" -}}
+{{ index .Tags "host" }} is no longer using excessive network
+{{- else -}}
+{{ index .Tags "host" }} is using excessive network
+{{- end -}}
+'''
+
+var details = '''
+{{- if eq .Level "OK" -}}
+{{ index .Tags "host" }} is now only averaging {{ index .Fields "activity" }}B/s of network usage
+{{- else -}}
+{{ index .Tags "host" }} has been averaging {{ index .Fields "activity" }}B/s of network usage for the last 10 minutes
+{{- end -}}
+'''
+
+stream
+|from()
+ .measurement('net')
+ .groupBy('host')
+|eval(lambda: "bytes_recv" + "bytes_sent")
+ .as('activity')
+|sum('activity')
+ .as('activity')
+|derivative('activity')
+ .nonNegative()
+|window()
+ .period(10m)
+|mean('activity')
+ .as('activity')
+|alert()
+ .crit(lambda: "activity" > 1024*1024)
+ .message(message)
+ .details(details)
diff --git a/modules/tozt/manifests/tick.pp b/modules/tozt/manifests/tick.pp
index 39b341d..160fdd5 100644
--- a/modules/tozt/manifests/tick.pp
+++ b/modules/tozt/manifests/tick.pp
@@ -6,6 +6,12 @@ class tozt::tick {
source => 'puppet:///modules/tozt/kapacitor/deadman.tick';
"cpu":
source => 'puppet:///modules/tozt/kapacitor/cpu.tick';
+ "net":
+ source => 'puppet:///modules/tozt/kapacitor/net.tick';
+ # TODO: disk usage is a bit more all over the place, need to figure out a
+ # better way to express this alert
+ # "disk":
+ # source => 'puppet:///modules/tozt/kapacitor/disk.tick';
}
secret {