-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate-legislators.sh
More file actions
executable file
·165 lines (152 loc) · 5.58 KB
/
validate-legislators.sh
File metadata and controls
executable file
·165 lines (152 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env bash
# validate-legislators.sh — Check mo-legislators.md data against official sources
#
# Usage: ./validate-legislators.sh
#
# This script performs basic validation on the legislator reference file:
# 1. Checks that all 34 senate districts are present
# 2. Checks that no room numbers are missing (VERIFY / —)
# 3. Checks that phone numbers are formatted correctly
# 4. Checks that email patterns match expected format
# 5. Reports the last-verified date and warns if stale (>30 days)
# 6. Optionally fetches the official Senate roster page to compare names
#
# Requirements: bash, grep, awk, curl (optional for online checks)
set -euo pipefail
FILE="mo-legislators.md"
CSV="senate-mail-merge.csv"
ERRORS=0
WARNINGS=0
echo "=========================================="
echo " MO-Gov Legislator Data Validator"
echo "=========================================="
echo ""
# --- Check files exist ---
if [[ ! -f "$FILE" ]]; then
echo "ERROR: $FILE not found. Run from the mo-gov repo root."
exit 1
fi
# --- 1. Check all 34 districts are present ---
echo "1. Checking all 34 Senate districts are present..."
for d in $(seq 1 34); do
if ! grep -qP "^\| $d \|" "$FILE"; then
echo " ERROR: District $d not found in $FILE"
((ERRORS++))
fi
done
FOUND=$(grep -cP '^\| \d+ \|' "$FILE" || true)
echo " Found $FOUND district rows (expected 34)"
if [[ "$FOUND" -eq 34 ]]; then
echo " ✓ All 34 districts present"
else
echo " ✗ Missing districts detected"
((ERRORS++))
fi
echo ""
# --- 2. Check for missing room numbers ---
echo "2. Checking for missing room numbers..."
MISSING_ROOMS=$(grep -cP '^\| \d+ \|.*\| (VERIFY|—) \|' "$FILE" || true)
if [[ "$MISSING_ROOMS" -gt 0 ]]; then
echo " WARNING: $MISSING_ROOMS senators have unverified room numbers:"
grep -P '^\| \d+ \|.*\| (VERIFY|—) \|' "$FILE" | awk -F'|' '{print " - District " $2 ": " $3}' | sed 's/^ *//'
((WARNINGS += MISSING_ROOMS))
else
echo " ✓ All room numbers populated"
fi
echo ""
# --- 3. Check phone number format ---
echo "3. Checking phone number format..."
BAD_PHONES=$(grep -P '^\| \d+ \|' "$FILE" | grep -cvP '\(573\) 751-\d{4}' || true)
# Subtract header row if matched
if [[ "$BAD_PHONES" -gt 0 ]]; then
echo " WARNING: $BAD_PHONES rows may have missing or malformed phone numbers"
grep -P '^\| \d+ \|' "$FILE" | grep -vP '\(573\) 751-\d{4}' | head -5
((WARNINGS += BAD_PHONES))
else
echo " ✓ All phone numbers match expected format (573) 751-XXXX"
fi
echo ""
# --- 4. Check last-verified date ---
echo "4. Checking data freshness..."
VERIFIED_DATE=$(grep -oP 'Last verified.*?:\s*\K.*' "$FILE" | head -1 | xargs)
if [[ -n "$VERIFIED_DATE" ]]; then
echo " Last verified: $VERIFIED_DATE"
# Try to parse date and check if >30 days old
if command -v date &>/dev/null; then
VERIFIED_EPOCH=$(date -d "$VERIFIED_DATE" +%s 2>/dev/null || echo "0")
NOW_EPOCH=$(date +%s)
if [[ "$VERIFIED_EPOCH" -gt 0 ]]; then
DAYS_OLD=$(( (NOW_EPOCH - VERIFIED_EPOCH) / 86400 ))
if [[ "$DAYS_OLD" -gt 30 ]]; then
echo " WARNING: Data is $DAYS_OLD days old. Consider re-verifying."
((WARNINGS++))
else
echo " ✓ Data is $DAYS_OLD days old (within 30-day window)"
fi
fi
fi
else
echo " WARNING: No 'Last verified' date found in $FILE"
((WARNINGS++))
fi
echo ""
# --- 5. Check CSV if present ---
if [[ -f "$CSV" ]]; then
echo "5. Checking mail merge CSV..."
CSV_ROWS=$(tail -n +2 "$CSV" | wc -l)
echo " Found $CSV_ROWS data rows in $CSV (expected 34)"
# Check for VERIFY entries in CSV
CSV_VERIFY=$(grep -c "VERIFY" "$CSV" || true)
if [[ "$CSV_VERIFY" -gt 0 ]]; then
echo " WARNING: $CSV_VERIFY VERIFY entries remain in CSV"
((WARNINGS++))
else
echo " ✓ No VERIFY entries in CSV"
fi
# Check all rows have email
CSV_NO_EMAIL=$(tail -n +2 "$CSV" | awk -F',' '{print $13}' | grep -c '""' || true)
if [[ "$CSV_NO_EMAIL" -gt 0 ]]; then
echo " WARNING: $CSV_NO_EMAIL rows missing email addresses"
((WARNINGS++))
else
echo " ✓ All rows have email addresses"
fi
else
echo "5. Skipping CSV check ($CSV not found)"
fi
echo ""
# --- 6. Optional: Online verification ---
echo "6. Online verification (optional)..."
if command -v curl &>/dev/null; then
echo " Checking senate.mo.gov accessibility..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "https://senate.mo.gov/Senators" 2>/dev/null || echo "000")
if [[ "$HTTP_CODE" == "200" ]]; then
echo " ✓ senate.mo.gov is accessible (HTTP $HTTP_CODE)"
echo " TIP: Visit https://senate.mo.gov/Senators/Directory to verify roster"
elif [[ "$HTTP_CODE" == "403" ]]; then
echo " ⚠ senate.mo.gov returned 403 (blocked automated access)"
echo " TIP: Verify manually at https://senate.mo.gov/Senators/Directory"
else
echo " ⚠ senate.mo.gov returned HTTP $HTTP_CODE"
fi
else
echo " Skipping (curl not available)"
fi
echo ""
# --- Summary ---
echo "=========================================="
echo " SUMMARY"
echo "=========================================="
echo " Errors: $ERRORS"
echo " Warnings: $WARNINGS"
echo ""
if [[ "$ERRORS" -gt 0 ]]; then
echo " ✗ FAIL — Fix errors before printing labels or sending letters."
exit 1
elif [[ "$WARNINGS" -gt 0 ]]; then
echo " ⚠ PASS with warnings — Review warnings above."
exit 0
else
echo " ✓ ALL CHECKS PASSED — Data is print-ready."
exit 0
fi