-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathassignment_solutions_section4.sql
More file actions
203 lines (158 loc) · 7.07 KB
/
assignment_solutions_section4.sql
File metadata and controls
203 lines (158 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
-- Connect to database (MySQL)
USE maven_advanced_sql;
-- ASSIGNMENT 1: Window function basics
-- View the orders table
SELECT *
FROM orders;
-- View the columns of interest
SELECT customer_id, order_id, order_date, transaction_id
FROM orders
ORDER BY customer_id, transaction_id;
-- For each customer, add a column for transaction number
SELECT customer_id, order_id, order_date, transaction_id,
ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY transaction_id) AS transaction_num
FROM orders
ORDER BY customer_id, transaction_id;
-- ASSIGNMENT 2: Row Number vs Rank vs Dense Rank
-- View the columns of interest
SELECT order_id, product_id, units
FROM orders;
-- Try ROW_NUMBER to rank the units
SELECT order_id, product_id, units,
ROW_NUMBER() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rn
FROM orders
ORDER BY order_id, product_rn;
-- For each order, rank the products from most units to fewest units
-- If there's a tie, keep the tie and don't skip to the next number after
SELECT order_id, product_id, units,
DENSE_RANK() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rank
FROM orders
ORDER BY order_id, product_rank;
-- Check the order id that ends with 44262 from the results preview
SELECT order_id, product_id, units,
DENSE_RANK() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rank
FROM orders
WHERE order_id LIKE '%44262'
ORDER BY order_id, product_rank;
-- ASSIGNMENT 3: First Value vs Last Value vs Nth Value
-- View the rankings from the last assignment
SELECT order_id, product_id, units,
DENSE_RANK() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rank
FROM orders
ORDER BY order_id, product_rank;
-- Add a column that contains the 2nd most popular product
-- EDIT: This NTH_VALUE solution doesn't account for ties and returns inaccurate values. The DENSE_RANK solution below is the correct one.
SELECT order_id, product_id, units,
NTH_VALUE(product_id, 2) OVER(PARTITION BY order_id ORDER BY units DESC) AS second_product
FROM orders
ORDER BY order_id, second_product;
-- Return the 2nd most popular product for each order
-- EDIT: This NTH_VALUE solution doesn't account for ties and returns inaccurate values. The DENSE_RANK solution below is the correct one.
SELECT * FROM
(SELECT order_id, product_id, units,
NTH_VALUE(product_id, 2) OVER(PARTITION BY order_id ORDER BY units DESC) AS second_product
FROM orders
ORDER BY order_id, second_product) AS sp -- ORDER BY in subquery is not needed and can be omitted
WHERE product_id = second_product;
-- Alternative using DENSE RANK
-- Add a column that contains the rankings (this DENSE_RANK solution
SELECT order_id, product_id, units,
DENSE_RANK() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rank
FROM orders
ORDER BY order_id, product_rank;
-- Return the 2nd most popular product for each order
SELECT * FROM
(SELECT order_id, product_id, units,
DENSE_RANK() OVER(PARTITION BY order_id ORDER BY units DESC) AS product_rank
FROM orders
ORDER BY order_id, product_rank) AS pr -- ORDER BY in subquery is not needed and can be omitted
WHERE product_rank = 2;
-- ASSIGNMENT 4: Lead & Lag
-- View the columns of interest
SELECT customer_id, order_id, product_id, transaction_id, order_date, units
FROM orders;
-- For each customer, return the total units within each order
SELECT customer_id, order_id, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id;
-- Add on the transaction id to keep track of the order of the orders
SELECT customer_id, order_id, MIN(transaction_id) min_tid, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id, min_tid;
-- Turn the query into a CTE and view the columns of interest
WITH my_cte AS (SELECT customer_id, order_id, MIN(transaction_id) min_tid, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id, min_tid)
SELECT customer_id, order_id, total_units
FROM my_cte;
-- Create a prior units column
WITH my_cte AS (SELECT customer_id, order_id, MIN(transaction_id) min_tid, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id, min_tid)
SELECT customer_id, order_id, total_units,
LAG(total_units) OVER(PARTITION BY customer_id ORDER BY min_tid) AS prior_units
FROM my_cte;
-- For each customer, find the change in units per order over time
-- APPROACH 1: One CTE - more concise approach
WITH my_cte AS (SELECT customer_id, order_id, MIN(transaction_id) AS min_tid, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id, min_tid) -- ORDER BY in CTE is not needed and can be omitted
SELECT customer_id, order_id, total_units,
LAG(total_units) OVER(PARTITION BY customer_id ORDER BY min_tid) AS prior_units,
total_units - LAG(total_units) OVER(PARTITION BY customer_id ORDER BY min_tid)
FROM my_cte;
-- APPROACH 2: Multiple CTEs - step-by-step approach
WITH my_cte AS (SELECT customer_id, order_id, MIN(transaction_id) min_tid, SUM(units) AS total_units
FROM orders
GROUP BY customer_id, order_id
ORDER BY customer_id, min_tid), -- ORDER BY in CTE is not needed and can be omitted
prior_cte AS (SELECT customer_id, order_id, total_units,
LAG(total_units) OVER(PARTITION BY customer_id ORDER BY min_tid) AS prior_units
FROM my_cte)
SELECT customer_id, order_id, total_units, prior_units,
total_units - prior_units AS diff_units
FROM prior_cte;
-- ASSIGNMENT 5: NTILE
-- Calculate the total amount spent by each customer
-- View the data needed from the orders table
SELECT customer_id, product_id, units
FROM orders;
-- View the data needed from the products table
SELECT product_id, unit_price
FROM products;
-- Combine the two tables and view the columns of interest
SELECT o.customer_id, o.product_id, o.units, p.unit_price
FROM orders o LEFT JOIN products p
ON o.product_id = p.product_id;
-- Calculate the total spending by each customer and sort the results from highest to lowest
SELECT o.customer_id, SUM(o.units * p.unit_price) AS total_spend
FROM orders o LEFT JOIN products p
ON o.product_id = p.product_id
GROUP BY o.customer_id
ORDER BY total_spend DESC;
-- Turn the query into a CTE and apply the percentile calculation
WITH ts AS (SELECT o.customer_id, SUM(o.units * p.unit_price) AS total_spend
FROM orders o LEFT JOIN products p
ON o.product_id = p.product_id
GROUP BY o.customer_id
ORDER BY total_spend DESC)
SELECT customer_id, total_spend,
NTILE(100) OVER(ORDER BY total_spend DESC) AS spend_pct
FROM ts;
-- Return the top 1% of customers in terms of spending
WITH ts AS (SELECT o.customer_id, SUM(o.units * p.unit_price) AS total_spend
FROM orders o LEFT JOIN products p
ON o.product_id = p.product_id
GROUP BY o.customer_id
ORDER BY total_spend DESC), -- ORDER BY in CTE is not needed and can be omitted
sp AS (SELECT customer_id, total_spend,
NTILE(100) OVER(ORDER BY total_spend DESC) AS spend_pct
FROM ts)
SELECT *
FROM sp
WHERE spend_pct = 1;