-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschool_data_queries.sql
More file actions
411 lines (363 loc) · 11.4 KB
/
school_data_queries.sql
File metadata and controls
411 lines (363 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
-- School Quarter 3 Analysis
-- Explore grades overall to create familiarity
-- Count of grades received (A=4, B=3, etc.)
SELECT
grade_point_used,
count(*) AS count,
round(count(*)/(
SELECT count(*)
FROM grades AS g
)*100, 2) AS perc
FROM grades
GROUP BY
grade_point_used
ORDER BY grade_point_used DESC;
-- Count of grades received using +/- grades (e.g., B+ is 3.3, not 3)
SELECT
grade_point_dec,
count(*) AS count,
round(count(*)/(
SELECT count(*)
FROM grades AS g
)*100, 2) AS perc
FROM grades
GROUP BY
grade_point_dec
ORDER BY grade_point_dec DESC;
-- Count of grades received (A=4, B=3, etc.) in core classes
SELECT
grade_point_used AS grade_for_core,
count(*) AS count,
round(count(*)/(
SELECT count(*)
FROM grades AS g
JOIN courses AS c USING(course_id, course_subject, course_title)
WHERE core_req=1
)*100, 2) AS perc
FROM grades AS g
JOIN courses AS c USING(course_id, course_subject, course_title)
WHERE core_req=1
GROUP BY
grade_point_used
ORDER BY grade_point_used DESC;
-- Count of grades received (A=4, B=3, etc.) in non-core classes
SELECT
grade_point_used AS grade_non_core,
count(*) AS count,
round(count(*)/(
-- Subquery to find percentage of students receiving that grade (only for core classes)
SELECT count(*)
FROM grades AS g
LEFT JOIN courses AS c USING(course_id, course_subject, course_title)
WHERE core_req != 1
)*100, 2) AS perc
FROM grades AS g
LEFT JOIN courses AS c USING(course_id, course_subject, course_title)
WHERE core_req != 1
GROUP BY
grade_point_used
ORDER BY grade_point_used DESC;
-- Average grade (using +/-) by department
SELECT
course_subject,
round(avg(grade_point_dec), 2) AS average
FROM grades
GROUP BY course_subject
ORDER BY average DESC;
-- Average grade (using +/-) in only core classes by department
SELECT
course_subject,
round(avg(grade_point_dec), 2) AS average
FROM grades
LEFT JOIN courses AS c USING(course_id, course_title, course_subject)
WHERE
c.core_req=1
GROUP BY course_subject
ORDER BY average DESC;
-- Average grade (using +/-) in only core classes by department(but not including ACS in English)
SELECT
course_subject,
round(avg(grade_point_dec), 2) AS average
FROM grades
LEFT JOIN courses AS c USING(course_id, course_title, course_subject)
WHERE
c.core_req=1
AND course_title != 'Adv Comm Skills'
GROUP BY course_subject
ORDER BY average DESC;
-- Average grade by course, identifying the 15 hardest courses (only including courses with at least 20 students) -- included in tables at end of report
SELECT
course_title,
COUNT(*) AS student_count,
ROUND(AVG(absence_perc), 2) AS avg_absence,
ROUND(AVG((ss_absences)*100/18), 2) AS avg_ss_abs,
ROUND(AVG(grade_point_dec), 2) AS avg_grade,
ROUND(AVG(pass_or_fail), 2) AS pass_rate,
ROUND(AVG(c_or_higher), 2) AS c_or_higher_rate
FROM all_student_data
GROUP BY course_title
HAVING student_count>=20
ORDER BY avg_grade ASC
LIMIT 15;
-- Identifying students who aren't listed on the GPA table -- possible non-diploma seeking students?
SELECT
DISTINCT student_id,
first_name,
last_name,
grade_level
FROM grades
LEFT JOIN gpa USING(student_id)
LEFT JOIN students USING(student_id)
WHERE gpa.student_id IS NULL;
-- Create table with all relevant information for R analysis
CREATE TABLE all_student_data AS (
SELECT
g.student_id,
g.teacher_id,
g.grade,
g.grade_point_dec,
g.grade_point_used,
g.course_subject,
g.course_title,
g.period,
-- Retrieve absences by period; default to 0 to account for students with no absences
COALESCE(CASE g.period
WHEN 1 THEN a.period_1
WHEN 2 THEN a.period_2
WHEN 3 THEN a.period_3
WHEN 4 THEN a.period_4
WHEN 5 THEN a.period_5
WHEN 6 THEN a.period_6
WHEN 7 THEN a.period_7
WHEN 8 THEN a.period_8
ELSE NULL
END, 0) AS absences,
-- Retrieve tardies by period; default to 0 to account for students with no tardies
COALESCE(CASE g.period
WHEN 1 THEN tar.period_1
WHEN 2 THEN tar.period_2
WHEN 3 THEN tar.period_3
WHEN 4 THEN tar.period_4
WHEN 5 THEN tar.period_5
WHEN 6 THEN tar.period_6
WHEN 7 THEN tar.period_7
WHEN 8 THEN tar.period_8
ELSE 0
END, 0) AS tardies,
COALESCE(a.support_seminar, 0) AS ss_absences,
gpa.gpa,
gpa.credits_attempted,
gpa.credits_completed,
s.gender,
s.gender_male,
s.gender_female,
s.gender_nonbinary,
s.grade_level,
-- Demographic fields; default to 0 for Boolean logic (if unlisted, not part of program)
COALESCE(e.ell, 0) AS ell,
COALESCE(s504.sec_504, 0) AS sec_504,
COALESCE(sped.sped, 0) AS sped,
COALESCE(tag.tag, 0) AS tag,
COALESCE(tr.transfer, 0) AS transfer
FROM grades AS g
LEFT JOIN absences AS a USING(student_id)
LEFT JOIN tardies as tar USING(student_id)
LEFT JOIN ell as e USING(student_id)
LEFT JOIN gpa USING(student_id)
LEFT JOIN sec_504 as s504 USING(student_id)
LEFT JOIN sped USING(student_id)
LEFT JOIN students as s USING(student_id)
LEFT JOIN tag as tag USING(student_id)
LEFT JOIN transfer as tr USING(student_id)
);
-- Adding columns for percentages of absences, tardies, and credit completion; adding pass/fail and A-C/D-F
ALTER TABLE all_student_data
ADD COLUMN absence_perc DECIMAL(3,0)
GENERATED ALWAYS AS ((absences/20)*100) STORED,
ADD COLUMN tardy_perc DECIMAL (3,0)
GENERATED ALWAYS AS ((tardies/20)*100) STORED,
ADD COLUMN ss_abs_perc DECIMAL(4,1)
GENERATED ALWAYS AS ((ss_absences/18)*100) STORED,
ADD COLUMN credit_perc DECIMAL(4,1)
GENERATED ALWAYS AS ((credits_completed/credits_attempted)*100) STORED,
ADD COLUMN pass_or_fail INTEGER
GENERATED ALWAYS AS
(CASE
WHEN grade_point_used>=1 THEN 1
ELSE 0
END) STORED,
ADD COLUMN c_or_higher INTEGER
GENERATED ALWAYS AS
(CASE
WHEN grade_point_used>=2 THEN 1
ELSE 0
END) STORED,
ADD COLUMN absence_rate VARCHAR(10)
GENERATED ALWAYS AS
(CASE
WHEN absence_perc BETWEEN 0 AND 9.9 THEN 'low'
WHEN absence_perc BETWEEN 10 AND 19.9 THEN 'medium'
WHEN absence_perc BETWEEN 20 AND 39.9 THEN 'high'
WHEN absence_perc >=40 THEN 'very high'
ELSE 'error'
END) STORED;
-- Creating table that includes data indicating core/non-core for R analysis
CREATE TABLE all_with_core AS (
SELECT *
FROM all_student_data
LEFT JOIN courses USING(course_title, course_subject)
);
-- Using newly generated table for further exploration prior to R analysis
-- Average grade/pass percent by absence percentage
SELECT
absence_perc,
round(avg(grade_point_dec), 2) AS avg_grade,
round(avg(pass_or_fail), 2) AS pass_perc,
count(*) AS count
FROM all_student_data
GROUP BY absence_perc
ORDER BY absence_perc ASC;
-- Grades/pass percentage as grouped by absence rates for all classes (with ROLLUP)
SELECT
absence_rate,
round(avg(absences), 2) as avg_classes_missed,
count(*) AS count,
round(count(*)/(
-- subquery to tally total number of rows to calculate percentage
SELECT count(*) FROM all_student_data
)*100, 1) AS perc_of_students,
round(avg(absence_perc), 2) AS avg_absence_perc,
round(avg(grade_point_dec), 2) AS avg_grade,
round(avg(pass_or_fail)* 100, 1) AS pass_perc
FROM all_student_data
GROUP BY absence_rate WITH ROLLUP
ORDER BY perc_of_students DESC;
-- Grades/pass percentage as grouped by absence rates for core classes (with ROLLUP)
SELECT
absence_rate,
round(avg(absences), 2) as avg_classes_missed,
count(*) AS count,
round(count(*)/(
-- subquery to tally total number of rows in core classes to calculate percentage
SELECT count(*)
FROM all_student_data
LEFT JOIN courses USING(course_title)
WHERE core_req=1
)*100, 1) AS perc_of_students,
round(avg(absence_perc), 2) AS avg_absence_perc,
round(avg(grade_point_dec), 2) AS avg_grade,
round(avg(pass_or_fail)* 100, 1) AS pass_perc,
round(avg(c_or_higher)* 100, 1) AS c_or_higher_perc
FROM all_student_data
LEFT JOIN courses USING(course_title)
WHERE core_req=1
GROUP BY absence_rate WITH ROLLUP
ORDER BY perc_of_students DESC;
-- Grades/pass percentage as grouped by absence rates for non-core classes (with ROLLUP)
SELECT
absence_rate,
round(avg(absences), 2) as avg_classes_missed,
count(*) AS count,
round(count(*)/(
-- subquery to tally total number of rows in non-core classes to calculate percentage
SELECT count(*)
FROM all_student_data
LEFT JOIN courses USING(course_title)
WHERE core_req=0
)*100, 1) AS perc_of_students,
round(avg(absence_perc), 2) AS avg_absence_perc,
round(avg(grade_point_dec), 2) AS avg_grade,
round(avg(pass_or_fail)* 100, 1) AS pass_perc,
round(avg(c_or_higher)* 100, 1) AS c_or_higher_perc
FROM all_student_data
LEFT JOIN courses USING(course_title)
WHERE core_req=0
GROUP BY absence_rate WITH ROLLUP
ORDER BY perc_of_students DESC;
-- GPA as correlated with support seminar absences
SELECT
ss_abs_rate,
count(*) AS count,
round(avg(gpa), 2) AS avg_gpa
FROM (
SELECT
gpa,
student_id,
CASE
WHEN support_seminar<=1 THEN 'low'
WHEN support_seminar<=3 THEN 'middle'
WHEN support_seminar<=7 THEN 'high'
ELSE 'very high'
END AS ss_abs_rate
FROM gpa
LEFT JOIN absences USING(student_id)
) AS ss_abs_cat
GROUP BY ss_abs_rate
ORDER BY count DESC;
-- Absences/grades as grouped by demographic factors (for table "Summary of Demographic Factors")
-- Utilizing stored procedure in order to reduce unnecessary repetition
DROP PROCEDURE IF EXISTS demographics;
DELIMITER $$
CREATE PROCEDURE demographics(IN demographic_column VARCHAR(20))
BEGIN
SET @demog_query = CONCAT(
'SELECT ',
demographic_column, ', ',
'COUNT(*) AS count, ',
'ROUND(COUNT(*) / 8350.0, 2) AS perc_of_students, ',
'ROUND(AVG(absence_perc), 2) AS avg_absence, ',
'ROUND(AVG((ss_absences)*100/18), 2) AS avg_ss_abs, ',
'ROUND(AVG(grade_point_dec), 2) AS avg_grade, ',
'ROUND(AVG(pass_or_fail), 2) AS pass_rate, ',
'ROUND(AVG(c_or_higher), 2) AS c_or_higher_rate ',
'FROM all_student_data ',
'GROUP BY ', demographic_column
);
PREPARE demog_stmt FROM @demog_query;
EXECUTE demog_stmt;
DEALLOCATE PREPARE demog_stmt;
END $$
DELIMITER ;
CALL demographics('sped');
CALL demographics('sec_504');
CALL demographics('ell');
CALL demographics('tag');
CALL demographics('transfer');
CALL demographics('gender');
-- Absences/grades as grouped by school goupings (for tables at the end of the report)
-- Grade Level
CALL demographics('grade_level');
-- Department for all classes (with ROLLUP)
SELECT
course_subject,
COUNT(*) AS count,
ROUND(COUNT(*) / 8350.0, 2) AS perc_of_students,
ROUND(AVG(absence_perc), 2) AS avg_absence,
ROUND(AVG(tardy_perc), 2) AS avg_tardies,
ROUND(AVG((ss_absences)*100/18), 2) AS avg_ss_abs,
ROUND(AVG(grade_point_dec), 2) AS avg_grade,
ROUND(AVG(pass_or_fail), 2) AS pass_rate,
ROUND(AVG(c_or_higher), 2) AS c_or_higher_rate
FROM all_student_data
GROUP BY course_subject WITH ROLLUP;
-- Department for core classes (with ROLLUP)
SELECT
course_subject,
COUNT(*) AS count,
ROUND(COUNT(*)/(
-- subquery to tally total number of rows in core classes to calculate percentage
SELECT count(*)
FROM all_student_data
LEFT JOIN courses USING(course_title)
WHERE core_req=1
), 2) AS perc_of_students,
ROUND(AVG(absence_perc), 2) AS avg_absence,
ROUND(AVG((ss_absences)*100/18), 2) AS avg_ss_abs,
ROUND(AVG(grade_point_dec), 2) AS avg_grade,
ROUND(AVG(pass_or_fail), 2) AS pass_rate,
ROUND(AVG(c_or_higher), 2) AS c_or_higher_rate
FROM all_student_data
LEFT JOIN courses USING(course_title, course_subject)
WHERE core_req=1
GROUP BY course_subject WITH ROLLUP
ORDER BY avg_grade;