-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathshallow_conv_kernel.py
More file actions
179 lines (147 loc) · 7.24 KB
/
shallow_conv_kernel.py
File metadata and controls
179 lines (147 loc) · 7.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from sollya import Interval
from metalibm_core.core.ml_complex_formats import ML_Pointer_Format
from metalibm_core.core.ml_formats import (ML_Binary32, ML_Int32, ML_Void)
from metalibm_core.core.ml_operations import (
Variable, Multiplication, Statement, Return, Constant)
from metalibm_core.code_generation.generic_processor import GenericProcessor
from metalibm_core.utility.ml_template import (
DefaultArgTemplate, ML_NewArgTemplate)
from meta_tensor_function import MetaTensorFunction
from tensor_iterator import (
Tensor,
TensorDescriptor,
NDRange, WriteAccessor, ReadAccessor,
IterRange, Sum,
expand_ndrange, tile_ndrange, exchange_loop_order
)
class ShallowConvKernel(MetaTensorFunction):
""" Meta shallow convolution kernel, with reduced
number of channels"""
function_name = "shallow_conv_kernel"
arity = 6
def __init__(self, args=DefaultArgTemplate):
# list of argument indexes (in generated function argument list)
# corresponding to output (resp. input) tensors
output_tensor_indexes = [0]
input_tensor_indexes = [1, 2]
MetaTensorFunction.__init__(self, output_tensor_indexes, input_tensor_indexes, args)
self.kernel_size = args.kernel_size
# patch output format
self.implementation.set_output_format(ML_Void)
@staticmethod
def get_default_args(**kw):
""" Return a structure containing the arguments for ML_Exponential,
builtin from a default argument mapping overloaded with @p kw """
default_args_mmk = {
"output_file": "shallow_conv_kernel.c",
"function_name": "shallow_conv_kernel",
"test_index_range": [[16, 32], [16, 32], [16, 32]],
"auto_test_range": [Interval(-1, 1), Interval(-1, 1)],
"precision": ML_Binary32,
"target": GenericProcessor.get_target_instance()
}
default_args_mmk.update(kw)
return DefaultArgTemplate(**default_args_mmk)
def generate_scheme(self):
size_format = ML_Int32
# Matrix storage
in_storage = self.implementation.add_input_variable("buffer_in", ML_Pointer_Format(self.precision))
kernel_storage = self.implementation.add_input_variable("buffer_kernel", ML_Pointer_Format(self.precision))
out_storage = self.implementation.add_input_variable("buffer_out", ML_Pointer_Format(self.precision))
# Matrix sizes
w = self.implementation.add_input_variable("w", size_format)
h = self.implementation.add_input_variable("h", size_format)
# A is a (n x p) matrix in row-major
tIn = Tensor(in_storage, TensorDescriptor([w, h], [1, w], self.precision))
# B is a (p x m) matrix in row-major
kernel_strides = [1]
for previous_dim in self.kernel_size[:-1]:
kernel_strides.append(previous_dim * kernel_strides[-1])
print("kernel_strides: {}".format(kernel_strides))
tKernel = Tensor(kernel_storage, TensorDescriptor(self.kernel_size, kernel_strides, self.precision))
# C is a (n x m) matrix in row-major
tOut = Tensor(out_storage, TensorDescriptor([w, h], [1, w], self.precision))
index_format = ML_Int32
# main NDRange description
i = Variable("i", precision=index_format, var_type=Variable.Local)
j = Variable("j", precision=index_format, var_type=Variable.Local)
k_w = Variable("k_w", precision=index_format, var_type=Variable.Local)
k_h = Variable("k_h", precision=index_format, var_type=Variable.Local)
result = NDRange(
[IterRange(i, 0, w-1), IterRange(j, 0, h -1)],
WriteAccessor(
tOut, [i, j],
Sum(
Sum(Multiplication(
ReadAccessor(tIn, [i + k_w, j - k_h], self.precision),
ReadAccessor(tKernel, [k_w, k_h], self.precision)),
IterRange(k_w, -(self.kernel_size[0]-1) // 2, (self.kernel_size[0]-1) // 2),
precision=self.precision
),
IterRange(k_h, -(self.kernel_size[1]-1) // 2, (self.kernel_size[1]-1) // 2),
precision=self.precision)))
mdl_scheme = expand_ndrange(result)
print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None)))
return Statement(
mdl_scheme,
Return()
)
def get_ordered_arg_tuple(self, tensor_descriptors, input_tables, output_tables):
(input_tensor_descriptor_list, output_tensor_descriptor_list) = tensor_descriptors
tA_desc = input_tensor_descriptor_list[0]
tB_desc = input_tensor_descriptor_list[1]
p = tA_desc.sdim[0]
n = tA_desc.sdim[1]
m = tB_desc.sdim[0]
index_format = ML_Int32
return (
input_tables[0], input_tables[1],
output_tables[0],
Constant(n, precision=index_format),
Constant(m, precision=index_format),
Constant(p, precision=index_format),
)
def tensor_element_emulate(self, tensor_descriptors, output_tensor_id, linear_id, input_tables):
# matrix kernel only expects a single output tensor
assert output_tensor_id == 0
(input_tensor_descriptor_list, output_tensor_descriptor_list) = tensor_descriptors
out_nd_index = output_tensor_descriptor_list[0].get_multi_index_from_linear(linear_id)
j, i = out_nd_index
acc = 0
tA_desc = input_tensor_descriptor_list[0]
tB_desc = input_tensor_descriptor_list[1]
p = tA_desc.sdim[0]
assert p == tB_desc.sdim[1]
for k in range(p):
index_A = tA_desc.get_linear_index_from_multi([k, i])
index_B = tB_desc.get_linear_index_from_multi([j, k])
acc += input_tables[0][index_A] * input_tables[1][index_B]
return acc
def generate_output_tensor_descriptors(self, random_sizes):
""" generate list of instance of output tensor descriptors for testing """
n, m, p = random_sizes
# C is a (n x m) matrix in row-major
tC_desc = TensorDescriptor([m, n], [1, m], self.precision)
return [tC_desc]
def generate_innput_tensor_descriptors(self, random_sizes):
""" generate list of instance of input tensor descriptors for testing """
n, m, p = random_sizes
tA_desc = TensorDescriptor([p, n], [1, p], self.precision)
# B is a (p x m) matrix in row-major
tB_desc = TensorDescriptor([m, p], [1, m], self.precision)
return [tA_desc, tB_desc]
if __name__ == "__main__":
arg_template = ML_NewArgTemplate(default_arg=ShallowConvKernel.get_default_args())
# extra arguments
arg_template.get_parser().add_argument(
"--test-index-range", dest="test_index_range", default=[[16, 32], [16, 32], [16, 32]],
type=eval,
action="store", help="random range for matrix sizes")
arg_template.get_parser().add_argument(
"--kernel-size", dest="kernel_size", default=(3,3),
type=(lambda s: tuple(map(int, s.split(",")))),
action="store", help="random range for matrix sizes")
# argument extraction
args = arg_template.arg_extraction()
shallow_conv_kernel = ShallowConvKernel(args)
shallow_conv_kernel.gen_implementation()