-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathphysicalQueryTools.py
More file actions
203 lines (196 loc) · 7.94 KB
/
physicalQueryTools.py
File metadata and controls
203 lines (196 loc) · 7.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import math
import queryTools
indexes = set()
allAttributes: set[str] = set()
logicalPlans = []
numLogicalPlans = 0
def createPhysicalPlans(logicPlan: dict, indexesIn: dict[str,list], statistics: dict):
global indexes
indexes = indexesIn
global allAttributes
for key, values in statistics.items():
for value in values.get("V"):
allAttributes.add(f"{key}.{value}")
return _createPhysicalPlans(logicPlan)
def _createPhysicalPlans(logicPlan: dict):
# Select with index or filter?
if (logicPlan.get("op") == "Select"):
next = logicPlan.get("child")
predicates = set(logicPlan.get("predicate")).intersection(allAttributes) #gets rid of "=" "AND" and other values in predicate
# use index Scan if the next is a scan and all predicates are indexes.
if ((next.get("op") == "Scan") and (len(predicates.intersection(indexes)) == len(predicates))):
physPlan = {
"op": "indexScan",
"cardinality": logicPlan.get("cardinality"),
"predicate": logicPlan.get("predicate"),
"relation": next.get("relation")
}
return [physPlan]
# use Filter
else:
childPlans = _createPhysicalPlans(logicPlan.get("child"))
physPlans = []
for child in childPlans:
physPlans.append({
"op": "filter",
"cardinality": logicPlan.get("cardinality"),
"predicate": logicPlan.get("predicate"),
"child": child
})
return physPlans
# Just Project
elif (logicPlan.get("op") == "Project"):
childPlans = _createPhysicalPlans(logicPlan.get("child"))
physPlans = []
for child in childPlans:
physPlans.append({
"op": "project",
"cardinality": logicPlan.get("cardinality"),
"attrs": logicPlan.get("attrs"),
"child": child
})
return physPlans
# Build all the possibilities.
# If right has an index use index scan.
# Otherwise don't use index scan.
elif (logicPlan.get("op") == "Join"):
condition = set(logicPlan.get("condition")).intersection(allAttributes) #gets rid of "=" "AND" and other values in condition
leftPlans = _createPhysicalPlans(logicPlan.get("left"))
rightPlans = _createPhysicalPlans(logicPlan.get("right"))
physPlans = []
# Both attrs in condition have indexes.
# HASHED INDEX join.
# checks if the right relation has an index.
# checks if the right operation is a scan so it can be replaced with the indexScan in execution. (this is a simplification since pushing infromation down the execution is complicated)
rightRealtions = queryTools.getRelations(logicPlan.get("right"))
for relationAttr in iter(condition):
# condition must be both in the indexes and in the rightRelations.
for rightPlan in rightPlans:
if ((relationAttr.split(".")[0] in rightRealtions) and (relationAttr in indexes) and (rightPlan.get("op")) == "scan"):
# For each plan
for leftPlan in leftPlans:
for rightPlan in rightPlans:
physPlans.append({
"op": "hashedIndexJoin",
"cardinality": logicPlan.get("cardinality"),
"condition": logicPlan.get("condition"),
"left": leftPlan,
"right": rightPlan,
})
# short circuit the rest of this logic path.
return physPlans
# No attr in condition has an index
for leftPlan in leftPlans:
for rightPlan in rightPlans:
# Block nested
physPlans.append({
"op": "blockNestedLoopJoin",
"cardinality": logicPlan.get("cardinality"),
"condition": logicPlan.get("condition"),
"left": leftPlan,
"right": rightPlan
})
# Hash Join
physPlans.append({
"op": "hashJoin",
"cardinality": logicPlan.get("cardinality"),
"condition": logicPlan.get("condition"),
"left": leftPlan,
"right": rightPlan
})
return physPlans
# Just Scan
elif (logicPlan.get("op") == "Scan"):
physPlans = []
physPlans.append({
"op": "scan",
"cardinality": logicPlan.get("cardinality"),
"relation": logicPlan.get("relation")
})
return physPlans
statistics = {}
def estimatePhysPlans(physPlans: list[dict], statistics1):
global statistics
statistics = statistics1
estimates = []
for physPlan in physPlans:
estimates.append(estimatePhysPlan(physPlan))
return estimates
# Types of physcial Operators
# 1. scan
# 2. indexScan
# 3. filter
# 4. project
# 5. hashedIndexJoin
# 6. blockNestedLoopJoin
# 7. hashJoin
# Returns total io aswell as adds io to each node for it's estimated io.
def estimatePhysPlan(physPlan: dict):
# End of tree
if (physPlan.get("op") == "scan"):
# Cost = B(R)
io = statistics.get(physPlan.get("relation")).get("B")
physPlan["io"] = io
return io
# End of tree
elif (physPlan.get("op") == "indexScan"):
# BucketAccess = 1 + o but assume it's just 1
# dataFetch = B(R)/V(R,a)
# Cost = BucketAccess+dataFetch
B = statistics.get(physPlan.get("relation")).get("B")
# Get the indexes in the predicate.
index = set(physPlan.get("predicate")).intersection(indexes)
S = 1/statistics.get(physPlan.get("relation")).get("V").get(index.pop().split(".")[1])
dataFetch = S*B
physPlan["io"] = 1+dataFetch
return 1 + dataFetch
elif (physPlan.get("op") == "filter"):
# filter can have multiple condition attributes
# Cost = B(R)/multall(V(R,a))
totalIO = estimatePhysPlan(physPlan.get("child"))
physPlan["io"] = totalIO
# return totalIO + io
return totalIO
elif (physPlan.get("op") == "project"):
# Cost = B(R)
totalIO = estimatePhysPlan(physPlan.get("child"))
io = physPlan.get("child").get("io")
physPlan["io"] = io
# return totalIO + io
return io
elif (physPlan.get("op") == "hashedIndexJoin"):
# Clustered index
# dataFetch = 1
# Cost = B(R)+T(R)*dataFetch
totalIOLeft = estimatePhysPlan(physPlan.get("left"))
totalIORight = estimatePhysPlan(physPlan.get("right"))
# Bleft = physPlan.get("left").get("io")
Bleft = totalIOLeft
Tleft = physPlan.get("left").get("cardinality")
# for each tuple on the left
# Lookup 1
# dataFetch 1
io = Bleft+Tleft*2
physPlan["io"] = io
return io
elif (physPlan.get("op") == "blockNestedLoopJoin"):
# slides formula: B(R)+B(R)/(M-2)*B(S)
# lab3 formula: # Cost(outer) + |outer| × B(inner)
# Assume M = 3
# RESULT
# Cost = B(R)+T(R)*B(S)
totalIOLeft = estimatePhysPlan(physPlan.get("left"))
totalIORight = estimatePhysPlan(physPlan.get("right"))
Bleft = totalIOLeft
Tleft = physPlan.get("left").get("cardinality")
Bright = physPlan.get("right").get("io")
io = Bleft+Tleft*totalIORight
physPlan["io"] = io
return io
elif (physPlan.get("op") == "hashJoin"):
# Cost = B(R)+B(S)
totalIOLeft = estimatePhysPlan(physPlan.get("left"))
totalIORight = estimatePhysPlan(physPlan.get("right"))
io = totalIOLeft+totalIORight
physPlan["io"] = io
return io