-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqueryTools.py
More file actions
316 lines (296 loc) · 12.9 KB
/
queryTools.py
File metadata and controls
316 lines (296 loc) · 12.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import json
import math
relations: dict = {}
allAttributes: set[str] = set()
def rewriteLogicalPlan(plan: dict, reltions1: dict, statistics: dict):
# Build Info for later.
global relations
relations = reltions1.get("relations")
global allAttributes
for key, values in statistics.items():
for value in values.get("V"):
allAttributes.add(f"{key}.{value}")
# Do the rewrites.
plan = selectionPushDown(plan)
mergeSelections(plan)
plan = projectionPushdown(plan)
plan = mergeProjections(plan)
return plan
# QUERY REWRITES PROJECTION AND SELECTION PUSH DOWN
# ALSO MERGES SELECTIONS AND PROJECTIONS
# Finds selections and then pushes them down and marks them as pushed.
def selectionPushDown(plan:dict):
# Select
if (plan.get("op") == "Select"):
# Already pushed
if (plan.get("pushed") == True):
newPlan = plan.copy()
newPlan["child"] = selectionPushDown(plan.get("child"))
return newPlan
# Not yet pushed.
else:
newPlan = pushDownSelection(plan.copy(), plan.get("child"))
return selectionPushDown(newPlan)
# project
elif (plan.get("op") == "Project"):
newPlan = plan.copy()
newPlan["child"] = selectionPushDown(plan.get("child"))
return newPlan
# join
elif (plan.get("op") == "Join"):
newPlan = plan.copy()
newPlan["left"] = selectionPushDown(plan.get("left"))
newPlan["right"] = selectionPushDown(plan.get("right"))
return newPlan
# leaf of the tree.
elif (plan.get("op") == "Scan"):
newPlan = plan.copy()
return newPlan
# pushes the current plan (selection) down the tree.
def pushDownSelection(selection: dict, child: dict):
# Always push past a project.
if (child.get("op") == "Project"):
newPlan = child.copy()
newPlan["child"] = pushDownSelection(selection, child.get("child"))
return newPlan
# Always push past a project
elif (child.get("op") == "Select"):
newPlan = child.copy()
newPlan["child"] = pushDownSelection(selection, child.get("child"))
return newPlan
# Push past only sides
elif (child.get("op") == "Join"):
# Gather info
conditions:list[str] = selection.get("condition")
leftRelations = getRelations(child.get("left"))
rightRelations = getRelations(child.get("right"))
# Conditions in which sides
cond1InLeft = (conditions[0].split(".")[0] in leftRelations) or (conditions[0].split(".")[0] not in relations.keys())
cond2InLeft = (conditions[2].split(".")[0] in leftRelations) or (conditions[2].split(".")[0] not in relations.keys())
cond1InRight = (conditions[0].split(".")[0] in rightRelations) or (conditions[0].split(".")[0] not in relations.keys())
cond2InRight = (conditions[2].split(".")[0] in rightRelations) or (conditions[2].split(".")[0] not in relations.keys())
# can't push past join.
if ((cond1InLeft and not cond2InLeft) or (cond1InRight and not cond2InRight)):
selection["pushed"] = True
selection["child"] = child
return selection
else:
# setup Default
newPlan = child.copy()
newPlan["left"] = child.get("left")
newPlan["right"] = child.get("right")
# can Push past left
if (cond1InLeft and cond2InRight):
newPlan["left"] = pushDownSelection(selection, child.get("left"))
# can Push past right
if (cond1InRight and cond2InRight):
newPlan["right"] = pushDownSelection(selection, child.get("right"))
return newPlan
# Scan is the end.
elif (child.get("op") == "Scan"):
selection["pushed"] = True
selection["child"] = child
return selection
def mergeSelections(queryTree: dict):
# Check if the next is Select if so merge.
if (queryTree.get("op") == "Select"):
child:dict = queryTree.get("child")
# merge predicates
if (child.get("op") == "Select"):
queryTree.get("predicate").extend(["AND"])
queryTree.get("predicate").extend(child.get("predicate"))
queryTree["child"] = child.get("child")
# check for another select below.
mergeSelections(queryTree)
else:
mergeSelections(queryTree.get("child"))
# Check child
elif (queryTree.get("op") == "Project"):
mergeSelections(queryTree.get("child"))
# Check children
elif (queryTree.get("op") == "Join"):
mergeSelections(queryTree.get("left"))
mergeSelections(queryTree.get("right"))
# Return the relation.
elif (queryTree.get("op") == "Scan"):
pass
def projectionPushdown(queryTree: dict):
# Select
if (queryTree.get("op") == "Select"):
newTree = queryTree.copy()
newTree["child"] = projectionPushdown(queryTree.get("child"))
return newTree
# project
elif (queryTree.get("op") == "Project"):
# already been pushed
if (queryTree.get("pushed") == True):
newTree = queryTree.copy()
newTree["child"] = projectionPushdown(queryTree.get("child"))
return newTree
# needs to be pushed.
else:
newTree = pushDownProjection(queryTree.copy(), queryTree.get("child"))
return projectionPushdown(newTree)
# join
elif (queryTree.get("op") == "Join"):
newTree = queryTree.copy()
newTree["left"] = projectionPushdown(queryTree.get("left"))
newTree["right"] = projectionPushdown(queryTree.get("right"))
return newTree
# leaf of the tree.
elif (queryTree.get("op") == "Scan"):
newPlan = queryTree.copy()
return newPlan
def pushDownProjection(projection: dict, child: dict):
# Always push past a project.
if (child.get("op") == "Project"):
newPlan = child.copy()
newPlan["child"] = pushDownProjection(projection, child.get("child"))
return newPlan
# Never push past a selection
elif (child.get("op") == "Select"):
projection["pushed"] = True
projection["child"] = child
return projection
# Assumes above
elif (child.get("op") == "Join"):
# Gather info
# Info for can Push past.
projectionAttributes:list[str] = set(projection.get("attrs"))
joinAttributes = set([x for x in child.get("condition") if x in allAttributes])
# Info for can push past left and/or right.
projectionRelations = set(s.split(".")[0] for s in projection.get("attrs"))
leftRelations = set(getRelations(child.get("left")))
rightRelations = set(getRelations(child.get("right")))
# Can Push past.
# if join uses only attibutes in the projection then can push past.
if (joinAttributes.issubset(projectionAttributes)):
# Push past left
if (projectionRelations.issubset(leftRelations)):
child["left"] = pushDownProjection(projection, child.get("left"))
if (projectionRelations.issubset(rightRelations)):
child["right"] = pushDownProjection(projection, child.get("right"))
return child
# Can't push past.
else:
projection["pushed"] = True
projection["child"] = child
return projection
# Scan is the end.
elif (child.get("op") == "Scan"):
projection["pushed"] = True
projection["child"] = child
return projection
pass
def mergeProjections(queryTree: dict):
# Check if the next is Project if so merge.
if (queryTree.get("op") == "Project"):
child:dict = queryTree.get("child")
# merge predicates
if (child.get("op") == "Project"):
newTree = queryTree.copy()
newTree["attrs"].extend(child.get("attrs"))
newTree["child"] = mergeProjections(child.get("child"))
return newTree
# check for another project.
else:
newTree = queryTree.copy()
newTree["child"] = mergeProjections(queryTree.get("child"))
return newTree
# Check child
elif (queryTree.get("op") == "Select"):
newTree = queryTree.copy()
newTree["child"] = mergeProjections(queryTree.get("child"))
return newTree
# Check children
elif (queryTree.get("op") == "Join"):
newTree = queryTree.copy()
newTree["left"] = mergeProjections(queryTree.get("left"))
newTree["right"] = mergeProjections(queryTree.get("right"))
return newTree
# Return the relation.
elif (queryTree.get("op") == "Scan"):
return queryTree.copy()
# CARDINALITY ESTIMATE AND JOIN ORDER
def getCardinality(logicalPlan: dict, statistics: dict):
# Select only has Equality
if (logicalPlan.get("op") == "Select"):
newPlan = logicalPlan.copy()
cardinality, newPlan["child"] = getCardinality(logicalPlan.get("child"), statistics)
# Get attributes
relationsInPredicate = set(logicalPlan.get("predicate")).intersection(allAttributes)
# get new cardinality.
distinctValues = []
for predRelation in relationsInPredicate:
attributeParts = predRelation.split(".")
distinctValues.append(statistics.get(attributeParts[0]).get("V").get(attributeParts[1]))
if (min(distinctValues) != 0):
cardinality = cardinality/min(distinctValues)
else:
cardinality = 0
newPlan["cardinality"] = math.ceil(cardinality)
return (cardinality, newPlan)
# No reduction in tuples count.
elif (logicalPlan.get("op") == "Project"):
newPlan = logicalPlan.copy()
cardinality, newPlan["child"] = getCardinality(logicalPlan.get("child"), statistics)
newPlan["cardinality"] = math.ceil(cardinality)
return (cardinality, newPlan)
elif (logicalPlan.get("op") == "Join"):
# reoder join
newPlan = logicalPlan.copy()
leftCardinality, leftPlan = getCardinality(logicalPlan.get("left"), statistics)
rightCardinality, rightPlan = getCardinality(logicalPlan.get("right"), statistics)
# Put lower cardinality on the left
if (leftCardinality < rightCardinality):
newPlan["left"] = leftPlan
newPlan["right"] = rightPlan
else:
newPlan["left"] = rightPlan
newPlan["right"] = leftPlan
# Ensure condition order is correct.
cond = logicalPlan.get("condition")
leftRelations = getRelations(newPlan.get("left"))
leftCondition = cond[0].split(".")[0]
if leftCondition not in leftRelations:
newPlan["condition"] = [cond[2], cond[1], cond[0]]
else:
newPlan["condition"] = cond.copy()
# Estimate cardinality of join.
# Find cardonality of the attr for the left side
leftRelations = getRelations(newPlan.get("left"))
relationAttrs = set(f"{leftRelations[0]}.{attr}" for attr in statistics.get(leftRelations[0]).get("V").keys())
leftAttribute = set(newPlan.get("condition")).intersection(set(relationAttrs))
Stat_key = list(leftAttribute)[0].split(".")[1]
# min of cardonality from selections or cardonality of relation attr
distinctLeft = min(leftCardinality, statistics.get(leftRelations[0]).get("V").get(Stat_key))
# Find cardonality of the attr for the right side.
rightRelations = getRelations(newPlan.get("right"))
relationAttrs = set(f"{rightRelations[0]}.{attr}" for attr in statistics.get(rightRelations[0]).get("V").keys())
rightAttribute = set(newPlan.get("condition")).intersection(set(relationAttrs))
Stat_key = list(rightAttribute)[0].split(".")[1]
# min of cardonality from selections or cardonality of relation attr
distinctRight = min(rightCardinality, statistics.get(rightRelations[0]).get("V").get(Stat_key))
cardinality = (leftCardinality*rightCardinality)/max(distinctLeft, distinctRight)
newPlan["cardinality"] = math.ceil(cardinality)
return (cardinality, newPlan)
elif (logicalPlan.get("op") == "Scan"):
cardinality = statistics.get(logicalPlan.get("relation")).get("T")
newPlan = logicalPlan.copy()
newPlan["cardinality"] = math.ceil(cardinality)
return (cardinality, newPlan)
# HELPER FUNCTIONS
# Gets all relations down the query tree.
def getRelations(queryTree: dict) -> list:
# Just get child.
if (queryTree.get("op") == "Select" or queryTree.get("op") == "Project"):
return getRelations(queryTree.get("child"))
# Combine results from left and right.
elif (queryTree.get("op") == "Join"):
leftRelations = getRelations(queryTree.get("left"))
rightRelations = getRelations(queryTree.get("right"))
leftRelations.extend(rightRelations)
return leftRelations
# Return the relation.
elif (queryTree.get("op") == "Scan"):
return [queryTree.get("relation")]