forked from mozilla-it/puppetctl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_puppet_agent
More file actions
executable file
·158 lines (138 loc) · 4.14 KB
/
check_puppet_agent
File metadata and controls
executable file
·158 lines (138 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/bin/bash
#
# A nrpe plugin that will poll the state of puppet on a machine. We want
# to have different notification intervals for different states, so this
# script operates in two different modes.
#
# -c: Check for catalog compile failures only
# -t: Check the last time puppet ran and check for run time errors
#
# Based on code by Alexander Swen <a@swen.nu>
#
# CHANGELOG
# 5/15/2013 bhourigan Initial commit
#
# NOTE: Puppet 3.x changed configprint syntax
puppetversion=$(puppet -V)
case $puppetversion in
2.*)
statedir=$(puppet --configprint statedir)
;;
3.*)
statedir=$(puppet config print statedir)
;;
*)
echo "Sorry, puppet version ${puppetversion:-UNKNOWN} is unsupported by $0"
exit 1
;;
esac
statefile=${statedir}/last_run_summary.yaml
reportfile=${statedir}/last_run_report.yaml
disablefile=${statedir}/disabled.at
usage(){
if [ $# -gt 0 ]; then
echo "ERROR: $*"
echo
fi
echo -e "Usage: $0 [-c] [-t <threshold>] [-s <statefile>] [-r <reportfile>] [-d <disablefile>]"
echo
echo -e "\t-w last run alert threshold"
echo -e "\t-c check for catalog compilation failures only"
echo -e "\t-d disable file location (default: ${statedir}/disabled.at)"
echo -e "\t-r report file location (default: ${statedir}/last_run_report.yaml)"
echo -e "\t-s state file location (default: ${statedir}/last_run_summary.yaml)"
echo
exit 1
}
result(){
echo "$1"
exit $2
}
check_last_run_time(){
if [ ! -f $disablefile ]; then
# Parse last_last run from statefile
last_run_time=$(awk '/\s*last_run:/ {print $2}' $statefile)
if [ ${last_run_time:-0} -eq 0 ]; then
result "Can't get last_run from $statefile" 3
fi
now=$(date "+%s")
time_since_last=$((now-last_run_time))
if [ $time_since_last -gt $threshold ]; then
result "Last run was ${time_since_last} seconds ago" 3
fi
else
comment=$(grep 'Puppet has been disabled' /etc/motd | sed -r "s/\x1B\[([0-9]{1,3}((;[0-9]{1,3})*)?)?[m|K]//g")
result "${comment}" 0
fi
}
check_last_run_errors(){
failed=$(awk '/\s*failed:/ {print $2}' $statefile)
failure=$(awk '/\s*failed:/ {print $2}' $statefile)
fail_to_restart=$(awk '/\s*failed:/ {print $2}' $statefile)
sum_of_fail=$((failed+failure+failure_to_restart))
if [ ${sum_of_fail:-0} -gt 0 ]; then
if [ ${sum_of_fail:-0} -gt 1 ]; then
s="s"
fi
result "Last run had ${sum_of_fail} error${s:-}" 1
fi
}
check_catalog_compile(){
failed_catalog=$(awk -F '"' '/\s*Could not retrieve catalog from remote server:|\s*Failed to apply catalog/ {gsub(/on node .*/,"",$2); print $2}' $reportfile)
if [ ! -z "${failed_catalog:-}" ]; then
result "${failed_catalog}" 2
fi
}
result_ok(){
configuration_version=$(awk -F '"' '/\s*configuration_version:/ {print $2}' $reportfile)
result "Puppet agent ${puppetversion} running catalog ${configuration_version:-UNKNOWN}" 0
}
while getopts ":t:s:d:c" opt; do
case $opt in
t)
threshold=$OPTARG
;;
t)
statefile=$OPTARG
;;
d)
disablefile=$OPTARG
;;
r)
reportfile=$OPTARG
;;
c)
check_catalog=1
;;
\?)
usage "Invalid option: -$OPTARG"
;;
:)
usage "Option -$OPTARG requires an argument."
;;
esac
done
if [ ! -f $statefile ]; then
result "State file $statefile doesn't exist" 3
fi
if [ ! -f $reportfile ]; then
result "Report file $reportfile doesn't exist" 3
fi
# Nagios won't allow us to have different alerting timers for different
# states (we want to see catalog compile failures and recoveries
# immediately, and run time errors daily), so this script will operate
# in two modes which will be defined with two different notification
# intervals in Nagios
if [ ${check_catalog:-0} -eq 1 ]; then
if [ ${threshold:-0} -gt 1 ]; then
usage "Threshold argument not allowed with -c"
fi
check_catalog_compile
else
if [ ${threshold:-0} -lt 1 ]; then
usage "Invalid time threshold $OPTARG"
fi
check_last_run_time
check_last_run_errors
fi
result_ok