tensor-kernel-codegen/meta_tensor_function.py at master · metalibm/tensor-kernel-codegen · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
import random

import sollya
from sollya import Interval, inf, sup

from metalibm_core.core.ml_formats import ML_Int32, ML_UInt32, ML_Void, ML_Int64
from metalibm_core.core.array_function import (
    ML_ArrayFunction,
    generate_1d_table, generate_2d_table
)
from metalibm_core.core.random_gen import get_precision_rng
from metalibm_core.core.special_values import FP_QNaN

from metalibm_core.utility.ml_template import DefaultArgTemplate
from metalibm_core.core.ml_table import ML_NewTable

from metalibm_core.code_generation.code_function import (
    CodeFunction, FunctionGroup
)
from metalibm_core.code_generation.generator_utility import (
    FunctionOperator, TemplateOperatorFormat,
    FO_Arg,
)
from metalibm_core.core.ml_operations import (
    Variable, Constant,
    Loop, ReferenceAssign, Statement,
    TableLoad,
    FunctionObject,
    Return, ConditionBlock,
    Addition,
)

class MetaTensorFunction(ML_ArrayFunction):
    def __init__(self,
                 output_tensor_args_indexes,
                 input_tensor_args_indexes,
                 args=DefaultArgTemplate):
        ML_ArrayFunction.__init__(self, args)
        self.output_tensor_args_indexes = output_tensor_args_indexes
        self.input_tensor_args_indexes = input_tensor_args_indexes

    def generate_output_tensor_descriptors(self, random_sizes):
        """ generate list of instance of output tensor descriptors for testing """
        raise NotImplementedError
    def generate_innput_tensor_descriptors(self, random_sizes):
        """ generate list of instance of input tensor descriptors for testing """
        raise NotImplementedError

    def generate_random_sizes(self):
        return [random.randrange(lo, hi+1) for lo,hi in self.test_index_range]

    def generate_test_tables(self, test_num, test_ranges=[Interval(-1.0, 1.0)]):
        """ Generate inputs and output table to be shared between auto test
            and max_error tests """
        random_sizes = self.generate_random_sizes()
        output_tensor_descriptor_list = self.generate_output_tensor_descriptors(random_sizes)
        input_tensor_descriptor_list = self.generate_innput_tensor_descriptors(random_sizes)

        index_range = self.test_index_range

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = len(input_tensor_descriptor_list)
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1


        # concatenating standard test array at the beginning of randomly
        # generated array
        INPUT_ARRAY_SIZE = [td.get_bounding_size() for td in input_tensor_descriptor_list]

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [td.scalar_format for td in input_tensor_descriptor_list]
        rng_map = [get_precision_rng(precision, inf(test_range), sup(test_range)) for precision, test_range in zip(input_precisions, test_ranges)]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE[table_id],
                input_precisions[table_id],
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(lambda _: input_precisions[table_id].round_sollya_object(rng_map[table_id].get_new_value(), sollya.RN))
            ) for table_id in range(NUM_INPUT_ARRAY)
        ]

        OUTPUT_ARRAY_SIZE = [td.get_bounding_size() for td in output_tensor_descriptor_list]
        OUTPUT_PRECISION = [td.scalar_format for td in output_tensor_descriptor_list]
        NUM_OUTPUT_ARRAY = len(output_tensor_descriptor_list)

        # generate output_array
        output_tables = [generate_1d_table(
            OUTPUT_ARRAY_SIZE[table_id],
            OUTPUT_PRECISION[table_id],
            self.uniquify_name("output_array_%d" % table_id),
            const=False,
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: 0)
        ) for table_id in range(NUM_OUTPUT_ARRAY)]
        tensor_descriptors = (input_tensor_descriptor_list, output_tensor_descriptor_list)
        return tensor_descriptors, input_tables, output_tables

    def generate_test_wrapper(self, tensor_descriptors, input_tables, output_tables):
        auto_test = CodeFunction("test_wrapper", output_format = ML_Int32)

        tested_function    = self.implementation.get_function_object()
        function_name      = self.implementation.get_name()

        failure_report_op       = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void, failure_report_op)

        printf_success_op = FunctionOperator("printf", arg_map = {0: "\"test successful %s\\n\"" % function_name}, void_function = True, require_header=["stdio.h"])
        printf_success_function = FunctionObject("printf", [], ML_Void, printf_success_op)

        # accumulate element number
        acc_num = Variable("acc_num", precision=ML_Int64, var_type=Variable.Local)

        test_loop = self.get_tensor_test_wrapper(
            tested_function,
            tensor_descriptors,
            input_tables,
            output_tables,
            acc_num,
            self.generate_tensor_check_loop)

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
          test_loop,
          printf_success_function(),
          Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])


    def get_ordered_arg_tuple(self, tensor_descriptors, input_tables, output_tables):
        """ generate an ordered tuple of argument for the current function
            assuming
            - tensor_descriptors is (input_tensor_descriptor_list, output_tensor_descriptor_list)
            - input tensors are listed in input_tables,
            - output tensors are listed in output_tables
        """
        # must be overloaded by actual function
        raise NotImplementedError

    def get_tensor_test_wrapper(
            self,
            tested_function,
            tensor_descriptors,
            input_tables, output_tables,
            acc_num,
            post_statement_generator,
            NUM_INPUT_ARRAY=1):
        """ generate a test loop for multi-array tests
             @param test_num number of elementary array tests to be executed
             @param tested_function FunctionObject to be tested
             @param table_size_offset_array ML_NewTable object containing
                    (table-size, offset) pairs for multi-array testing
             @param input_table ML_NewTable containing multi-array test inputs
             @param output_table ML_NewTable containing multi-array test outputs
             @param post_statement_generator is generator used to generate
                    a statement executed at the end of the test of one of the
                    arrays of the multi-test. It expects 6 arguments:
                    (input_tables, output_array, table_size_offset_array,
                     array_offset, array_len, test_id)
             @param printf_function FunctionObject to print error case
        """
        array_len = Variable("len", precision=ML_UInt32, var_type=Variable.Local)


        def pointer_add(table_addr, offset):
            pointer_format = table_addr.get_precision_as_pointer_format()
            return Addition(table_addr, offset, precision=pointer_format)

        array_inputs    = tuple(input_tables[in_id] for in_id in range(NUM_INPUT_ARRAY))
        function_call = tested_function(
            *(self.get_ordered_arg_tuple(tensor_descriptors, input_tables, output_tables)))

        post_statement = post_statement_generator(
                            tensor_descriptors,
                            input_tables, output_tables)

        test_statement = Statement(
            function_call,
            post_statement,
        )

        return test_statement


    def get_printf_error_detail_fct(self, tensor_descriptor):
        output_format = tensor_descriptor.scalar_format
        # result is the second argument of the function (after erroenous element index)
        result_arg_id = 1
        # build the format string for result/expected display
        result_display_format = output_format.get_display_format().format_string
        result_display_vars = output_format.get_display_format().pre_process_fct("{%d}" % result_arg_id)

        template = ("printf(\"error[%u]: {fct_name},"
                    " result is {result_display_format} "
                    "vs expected \""
                    ", {{0}}, {result_display_vars}"
                    ")").format(
                        fct_name=self.function_name,
                        result_display_format=result_display_format,
                        result_display_vars=result_display_vars,
                    )
        printf_op = TemplateOperatorFormat(template, void_function=True, arity=(1+1), require_header=["stdio.h"])
        printf_error_detail_function = FunctionObject("printf", [ML_UInt32] + [output_format], ML_Void, printf_op)
        return printf_error_detail_function

    def generate_tensor_check_loop(self, tensor_descriptors, input_tables, output_tables):
        # unpack tensor descriptors tuple
        (input_tensor_descriptor_list, output_tensor_descriptor_list) = tensor_descriptors
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_error_detail_function = self.get_printf_error_detail_fct(output_tensor_descriptor_list[0])

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_tables = self.generate_expected_table(tensor_descriptors, input_tables)

        # global statement to list all checks
        check_statement = Statement()

        # implement check for each output tensor
        for out_id, out_td in enumerate(output_tensor_descriptor_list):
            # expected values for the (vj)-th entry of the sub-array
            expected_values = [TableLoad(expected_tables[out_id], vj, i) for i in range(self.accuracy.get_num_output_value())]
            # local result for the (vj)-th entry of the sub-array
            local_result = TableLoad(output_tables[out_id], vj)

            array_len = out_td.get_bounding_size()

            if self.break_error:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj,) + (local_result,))),
                    self.accuracy.get_output_print_call(self.function_name, output_values)
                )
            else:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj,) + (local_result,))),
                    self.accuracy.get_output_print_call(self.function_name, expected_values),
                    Return(Constant(1, precision = ML_Int32))
                )
            check_array_loop = Loop(
                ReferenceAssign(vj, 0),
                vj < array_len,
                Statement(
                    ConditionBlock(
                        self.accuracy.get_output_check_test(
                            local_result,
                            expected_values
                        ),
                        return_statement_break
                    ),
                    ReferenceAssign(vj, vj+1),
                )
            )
            check_statement.add(check_array_loop)
        return check_statement

    def tensor_element_emulate(self, tensor_descriptors, output_tensor_id, element_id, input_tables):
        raise NotImplementedError


    def generate_expected_table(self, tensor_descriptors, input_tables):
        """ Generate the complete table of expected results """
        # tensor_descriptors unpacking
        (input_tensor_descriptor_list, output_tensor_descriptor_list) = tensor_descriptors
        ## output values required to check results are stored in output table
        NUM_INPUT_ARRAY = len(input_tables)


        expected_tables = []
        # generating expected value table
        for output_id, output_td in enumerate(output_tensor_descriptor_list):
            table_size = output_td.get_bounding_size()

            def expected_value_gen(element_id):
                """ generate a full row of expected values using inputs
                    from input_tables """
                exact_value = self.tensor_element_emulate(tensor_descriptors, output_id, element_id, input_tables)
                output_values = self.accuracy.get_output_check_value(exact_value)
                return output_values

            # FIXME: num_output_value should depend on accuracy requirement
            # for a specific output tensor
            num_output_value = self.accuracy.get_num_output_value()
            expected_table = generate_2d_table(
                table_size, num_output_value,
                output_td.scalar_format,
                "expected_table",
                value_gen=expected_value_gen
            )
            expected_tables.append(expected_table)
        return expected_tables