diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml index 127513ee3..f85a71fa8 100644 --- a/.github/workflows/smoke_test.yml +++ b/.github/workflows/smoke_test.yml @@ -52,45 +52,6 @@ jobs: path: tests/smoke_test/ac_smoke/report.html retention-days: 1 - smoke_test_benchmark_caffe: - strategy: - matrix: - os: [ubuntu-22.04] - runs-on: ${{ matrix.os }} - steps: - - name: Code checkout - uses: actions/checkout@v3 - - - name: Setting up miniconda - uses: conda-incubator/setup-miniconda@v2 - with: - miniconda-version: "latest" - auto-update-conda: true - activate-environment: caffe_env_3.7 - python-version: 3.7 - - - name: Setting up Python dependencies and Caffe - shell: bash -el {0} - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements_ci.txt - python -m pip install openvino-dev - python -m pip install apache-tvm - conda install -y caffe - - - name: Run smoke test for inference benchmark - shell: bash -el {0} - run: | - cd tests/smoke_test/benchmark_smoke && python -m pytest test_benchmark_smoke.py -m="caffe" - - - name: Upload benchmark caffe artifacts - uses: actions/upload-artifact@v4 - if: always() - with: - name: benchmark_report_caffe - path: tests/smoke_test/benchmark_smoke/report.html - retention-days: 1 - smoke_test_quantization: strategy: matrix: diff --git a/src/benchmark/frameworks/tvm/tvm_parameters_parser.py b/src/benchmark/frameworks/tvm/tvm_parameters_parser.py index 135cae2fb..1af961588 100644 --- a/src/benchmark/frameworks/tvm/tvm_parameters_parser.py +++ b/src/benchmark/frameworks/tvm/tvm_parameters_parser.py @@ -16,6 +16,7 @@ def parse_parameters(self, curr_test): CONFIG_FRAMEWORK_DEPENDENT_CHANNEL_SWAP_TAG = 'ChannelSwap' CONFIG_FRAMEWORK_DEPENDENT_LAYOUT_TAG = 'Layout' CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API = 'HighLevelAPI' + CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING = 'FewShotTuning' dep_parameters_tag = curr_test.getElementsByTagName(CONFIG_FRAMEWORK_DEPENDENT_TAG)[0] @@ -42,6 +43,10 @@ def parse_parameters(self, curr_test): _high_level_api = dep_parameters_tag.getElementsByTagName( CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API)[0].firstChild + _few_shot_tuning_tags = dep_parameters_tag.getElementsByTagName( + CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING) + _few_shot_tuning = _few_shot_tuning_tags[0].firstChild if _few_shot_tuning_tags else None + return TVMParameters( framework=_framework.data if _framework else None, input_name=_input_name.data if _input_name else None, @@ -54,6 +59,7 @@ def parse_parameters(self, curr_test): layout=_layout.data if _layout else None, target=_target.data if _target else None, high_level_api=_high_level_api.data if _high_level_api else None, + few_shot_tuning=_few_shot_tuning.data if _few_shot_tuning else None, ) @@ -61,7 +67,7 @@ class TVMParameters(FrameworkParameters): def __init__(self, framework, input_name, input_shape, normalize, mean, std, channel_swap, optimization_level, layout, target, - high_level_api): + high_level_api, few_shot_tuning=None): self.framework = None self.input_name = None self.input_shape = None @@ -73,6 +79,7 @@ def __init__(self, framework, input_name, input_shape, self.layout = None self.target = 'llvm' self.high_level_api = None + self.few_shot_tuning = None if self._framework_is_correct(framework): self.framework = framework @@ -96,6 +103,8 @@ def __init__(self, framework, input_name, input_shape, self.target = target if self._parameter_is_not_none(high_level_api): self.high_level_api = high_level_api + if self._parameter_is_not_none(few_shot_tuning): + self.few_shot_tuning = few_shot_tuning @staticmethod def _framework_is_correct(framework): diff --git a/src/benchmark/frameworks/tvm/tvm_process.py b/src/benchmark/frameworks/tvm/tvm_process.py index 7c6df753b..21f336103 100644 --- a/src/benchmark/frameworks/tvm/tvm_process.py +++ b/src/benchmark/frameworks/tvm/tvm_process.py @@ -77,13 +77,19 @@ def _fill_command_line(self): common_params, '--opt_level', opt_level) target = self._test.dep_parameters.target - common_params = TVMProcess._add_optional_argument_to_cmd_line( - common_params, '--target', target) + if target: + common_params = TVMProcess._add_argument_to_cmd_line( + common_params, '--target', f'"{target}"') high_level_api = self._test.dep_parameters.high_level_api common_params = TVMProcess._add_optional_argument_to_cmd_line( common_params, '--high_level_api', high_level_api) + few_shot_tuning = self._test.dep_parameters.few_shot_tuning + if few_shot_tuning == 'True': + common_params = TVMProcess._add_flag_to_cmd_line( + common_params, '--few_shot_tuning') + return f'{common_params}' diff --git a/src/benchmark/frameworks/tvm/tvm_test.py b/src/benchmark/frameworks/tvm/tvm_test.py index cabb43732..7d918b51e 100644 --- a/src/benchmark/frameworks/tvm/tvm_test.py +++ b/src/benchmark/frameworks/tvm/tvm_test.py @@ -14,6 +14,8 @@ def get_report(self, process): parameters.update({'Framework': self.dep_parameters.framework}) parameters.update({'HighLevelAPI': self.dep_parameters.high_level_api}) parameters.update({'Optimization level': self.dep_parameters.optimization_level}) + if self.dep_parameters.few_shot_tuning == 'True': + parameters.update({'FewShotTuning': 'True'}) other_param = self._get_optional_parameters_string(parameters) report_res = { diff --git a/src/configs/README.md b/src/configs/README.md index 068db32c7..389031123 100644 --- a/src/configs/README.md +++ b/src/configs/README.md @@ -357,6 +357,9 @@ - `HighLevelAPI` - тег, необязательный для заполнения. Определяет используемое высокоуровневое API: `Relay`, `RelayVM` или `RelaxVM`. По умолчанию задается значение `Relay`. - `OptimizationLevel` - тег, необязательный для заполнения. Определяет уровень оптимизаций для графа вычислений, которые ускоряют инференс. По умолчанию оптимизации не применяются. + - `FewShotTuning` - тег, необязательный для заполнения. Применяет FewShotTuning — быструю + настройку расписаний ядер для многопоточного вывода на CPU. Допустимые значения: `True`, `False`. + Применимо только для `RelaxVM`. По умолчанию не установлен. - `Framework` - тег, обязательный для заполнения. Определяет фреймворк, модели которого будут запущены средствами Apache TVM. По умолчанию задается фреймворк `TVM`. @@ -781,9 +784,10 @@ 0.229 0.224 0.225 NCHW - llvm + llvm -num-cores=4 RelaxVM 3 + True ``` diff --git a/src/configs/benchmark_configuration_file_template.xml b/src/configs/benchmark_configuration_file_template.xml index cdaa62703..37ef078f1 100644 --- a/src/configs/benchmark_configuration_file_template.xml +++ b/src/configs/benchmark_configuration_file_template.xml @@ -449,6 +449,7 @@ + diff --git a/src/inference/README.md b/src/inference/README.md index 3157f765c..4e21978be 100644 --- a/src/inference/README.md +++ b/src/inference/README.md @@ -961,6 +961,9 @@ inference_tvm.py - `-ol / --opt_level` - параметр, определяющий уровень оптимизации графа вычислений нейронной сети для ускорения инференса. По умолчанию оптимизации не применяются. +- `--few_shot_tuning` - применить FewShotTuning — быструю настройку расписаний + ядер для многопоточного вывода на CPU. Применимо только для `RelaxVM`. + По умолчанию не установлен. - `--raw_output` - работа скрипта без логов. По умолчанию не установлен. Аргументы, необходимые для инференса моделей MXNet с использованием Apache TVM: diff --git a/src/inference/inference_tvm.py b/src/inference/inference_tvm.py index 12a504ba9..eaf226c4c 100644 --- a/src/inference/inference_tvm.py +++ b/src/inference/inference_tvm.py @@ -172,6 +172,11 @@ def cli_argument_parser(): default='Relay', type=str, dest='high_level_api') + parser.add_argument('--few_shot_tuning', + help='Apply FewShotTuning scheduling pass for RelaxVM ' + 'to enable multi-threaded CPU inference.', + action='store_true', + dest='few_shot_tuning') parser.add_argument('--raw_output', help='Raw output without logs.', default=False, diff --git a/src/inference/tvm_auxiliary.py b/src/inference/tvm_auxiliary.py index 39e922ad1..900b74cec 100644 --- a/src/inference/tvm_auxiliary.py +++ b/src/inference/tvm_auxiliary.py @@ -182,6 +182,7 @@ def create_dict_for_converter(args): 'module': args.module, 'high_level_api': args.high_level_api, 'source_framework': args.source_framework, + 'few_shot_tuning': getattr(args, 'few_shot_tuning', False), } return dictionary diff --git a/src/model_converters/tvm_converter/README.md b/src/model_converters/tvm_converter/README.md index d35c5cac1..7875cafd3 100644 --- a/src/model_converters/tvm_converter/README.md +++ b/src/model_converters/tvm_converter/README.md @@ -45,6 +45,10 @@ This script converts model from `` to the TVM format. - `-d / --device` is a target device for inference. It equals `CPU` by default. - `-op / --output_dir` is path to save the model. +- `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`. + It equals `Relay` by default. +- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM` + to enable multi-threaded CPU inference. Disabled by default. ### Examples of usage @@ -83,6 +87,8 @@ for the Relay API or to the `.so`+`.ro` format for the VirtualMachine API. - `-t / --target` is target device information, for example `llvm` for CPU. - `--opt_level` is the optimization level of the task extractions. - `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`. +- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM` + to enable multi-threaded CPU inference. Disabled by default. - `--lib_name` is a file name to save compiled model. - `-op / --output_dir` is a path to save the model. @@ -95,5 +101,7 @@ python3 ./tvm_compiler.py -m efficientnet-b0.json -p efficientnet-b0.params \ ```sh python3 ./tvm_compiler.py -m resnet50.json -p resnet50.params \ - -t llvm --opt_level 1 --lib_name resnet50.so + -t "llvm -num-cores=16" --opt_level 3 \ + --high_level_api RelaxVM --few_shot_tuning \ + --lib_name resnet50.so ``` diff --git a/src/model_converters/tvm_converter/tvm_auxiliary/converter.py b/src/model_converters/tvm_converter/tvm_auxiliary/converter.py index b2f9a2d68..6124e506d 100644 --- a/src/model_converters/tvm_converter/tvm_auxiliary/converter.py +++ b/src/model_converters/tvm_converter/tvm_auxiliary/converter.py @@ -22,6 +22,7 @@ def __init__(self, args): self.target_str = args.get('target', None) self.module = args.get('module', None) self.high_level_api = args.get('high_level_api', None) + self.few_shot_tuning = args.get('few_shot_tuning', False) self.output_dir = args.get('output_dir', None) self.lib_name = args.get('lib_name', None) @@ -112,8 +113,10 @@ def save_tvm_model(self): fo.write(self.tvm.ir.save_json(self.mod)) def get_graph_module_from_lib(self, lib): - if self.high_level_api in ['Relay', 'RelayVM']: + if self.high_level_api == 'Relay': return self.__get_graph_module_from_relay_lib(lib) + elif self.high_level_api == 'RelayVM': + return self.__get_graph_module_from_relay_vm_lib(lib) elif self.high_level_api == 'RelaxVM': return self.__get_graph_module_from_relax_vm_lib(lib) else: @@ -124,6 +127,11 @@ def __get_graph_module_from_relay_lib(self, lib): self.graph = self.graph_executor.GraphModule(lib['default'](dev)) return self.graph + def __get_graph_module_from_relay_vm_lib(self, lib): + _, dev = self._get_target_device() + des_vm = self.tvm.runtime.vm.VirtualMachine(lib, dev) + return des_vm + def __get_graph_module_from_relax_vm_lib(self, lib): _, dev = self._get_target_device() des_vm = self.tvm.relax.VirtualMachine(lib, dev) @@ -155,9 +163,46 @@ def __get_lib_from_relay_vm(self, target, model): code, lib = executable.save() return code, lib + def _build_relax_vm_pipeline(self): + relax_transform = self.tvm.relax.transform + + passes = [ + relax_transform.LegalizeOps(), + relax_transform.AnnotateTIROpPattern(), + relax_transform.FoldConstant(), + relax_transform.FuseOps(), + relax_transform.FuseTIR(), + ] + + if self.few_shot_tuning: + self.log.info('Applying FewShotTuning for Relax VM') + passes.append(relax_transform.FewShotTuning(valid_count=1, benchmark=False)) + + passes.extend([ + relax_transform.RewriteDataflowReshape(), + relax_transform.ToNonDataflow(), + relax_transform.RemovePurityChecking(), + relax_transform.CallTIRRewrite(), + relax_transform.StaticPlanBlockMemory(), + relax_transform.LowerAllocTensor(), + relax_transform.KillAfterLastUse(), + relax_transform.LowerRuntimeBuiltin(), + relax_transform.ComputePrimValue(), + relax_transform.VMShapeLower(), + relax_transform.AttachGlobalSymbol(), + ]) + + @self.tvm.transform.module_pass(opt_level=0) + def pipeline(mod, _ctx): + return self.tvm.transform.Sequential(passes)(mod) + + return pipeline + def __get_lib_from_relax_vm(self, target, model): + pipeline = self._build_relax_vm_pipeline() with self.tvm.transform.PassContext(opt_level=self.opt_level): - lib = self.tvm.relax.build(model[0], target=target, params=model[1]) + lib = self.tvm.relax.build(model[0], target=target, params=model[1], + pipeline=pipeline) return [lib] def export_lib(self): @@ -187,7 +232,7 @@ def __get_graph_module_from_relay(self, mod, params, target, dev): def __get_graph_module_from_relay_vm(self, mod, params, target, dev): vm = self.tvm.runtime.vm rly_vm = self.tvm.relay.vm - if self.mod_type == 'so' and self.params_type == 'ro': + if self.mod_type in ['so', 'tar'] and self.params_type == 'ro': executable = vm.Executable.load_exec(params, mod) else: with self.tvm.transform.PassContext(opt_level=self.opt_level): @@ -196,8 +241,10 @@ def __get_graph_module_from_relay_vm(self, mod, params, target, dev): return des_vm def __get_graph_module_from_relax_vm(self, mod, params, target, dev): + pipeline = self._build_relax_vm_pipeline() with self.tvm.transform.PassContext(opt_level=self.opt_level): - executable = self.tvm.relax.build(mod, target=target, params=params) + executable = self.tvm.relax.build(mod, target=target, params=params, + pipeline=pipeline) des_vm = self.tvm.relax.VirtualMachine(executable, dev) return des_vm diff --git a/src/model_converters/tvm_converter/tvm_compiler.py b/src/model_converters/tvm_converter/tvm_compiler.py index 9ffd71f6c..98844ac58 100644 --- a/src/model_converters/tvm_converter/tvm_compiler.py +++ b/src/model_converters/tvm_converter/tvm_compiler.py @@ -47,6 +47,11 @@ def cli_argument_parser(): default='Relay', type=str, dest='high_level_api') + parser.add_argument('--few_shot_tuning', + help='Apply FewShotTuning scheduling pass for RelaxVM ' + 'to enable multi-threaded CPU inference.', + action='store_true', + dest='few_shot_tuning') args = parser.parse_args() return args @@ -61,6 +66,7 @@ def create_dict_for_compilation(args): 'lib_name': args.lib_name, 'output_dir': args.output_dir, 'high_level_api': args.high_level_api, + 'few_shot_tuning': args.few_shot_tuning, } return dictionary diff --git a/src/model_converters/tvm_converter/tvm_converter.py b/src/model_converters/tvm_converter/tvm_converter.py index 86dea2c6d..7828cdeb8 100644 --- a/src/model_converters/tvm_converter/tvm_converter.py +++ b/src/model_converters/tvm_converter/tvm_converter.py @@ -71,6 +71,11 @@ def cli_argument_parser(): default='Relay', type=str, dest='high_level_api') + parser.add_argument('--few_shot_tuning', + help='Apply FewShotTuning scheduling pass for RelaxVM ' + 'to enable multi-threaded CPU inference.', + action='store_true', + dest='few_shot_tuning') args = parser.parse_args() return args @@ -87,6 +92,7 @@ def create_dict_for_converter(args): 'output_dir': args.output_dir, 'source_framework': args.source_framework, 'high_level_api': args.high_level_api, + 'few_shot_tuning': args.few_shot_tuning, } return dictionary diff --git a/tests/smoke_test/benchmark_smoke/conftest.py b/tests/smoke_test/benchmark_smoke/conftest.py index f2794f9ed..c890c0ea5 100644 --- a/tests/smoke_test/benchmark_smoke/conftest.py +++ b/tests/smoke_test/benchmark_smoke/conftest.py @@ -19,7 +19,7 @@ 'person-detection-action-recognition-0006', 'person-detection-raisinghand-recognition-0001', 'person-detection-action-recognition-teacher-0002', 'yolo-v2-ava-0001', 'yolo-v2-tiny-ava-0001', 'yolo-v2-tf', 'yolo-v3-tf'] -DL_CAFFE_MODELS = ['googlenet-v1'] +DL_CAFFE_MODELS = [] def pytest_addoption(parser): @@ -215,10 +215,8 @@ def pytest_generate_tests(metafunc): param_list.append(smoke_test_params(**params)) id_list.append(config_file.stem) - # Mark Caffe tests + # Mark tests for i, test_param in enumerate(param_list): - if test_param.config_name in ['googlenet-v1_Caffe', 'googlenet-v1_TVM_Caffe', 'googlenet-v1_TVM']: - param_list[i] = pytest.param(test_param, marks=pytest.mark.caffe) if test_param.config_name in ['dgl']: param_list[i] = pytest.param(test_param) diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml deleted file mode 100644 index e1979c1f5..000000000 --- a/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - classification - googlenet-v1 - FP32 - Caffe - ../models_dir/public/googlenet-v1/googlenet-v1.prototxt - ../models_dir/public/googlenet-v1/googlenet-v1.caffemodel - - - Data - ../test_images/black_square.jpg - - - Caffe - 1 - CPU - 5 - 1 - python3 - - - - 104.0 117.0 123.0 - - - - - - \ No newline at end of file diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml deleted file mode 100644 index c0622a31a..000000000 --- a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - classification - googlenet-v1 - FP32 - TVM - ../models_dir/public/googlenet-v1/googlenet-v1.json - ../models_dir/public/googlenet-v1/googlenet-v1.params - - - Data - ../test_images/black_square.jpg - - - TVM - 1 - CPU - 5 - 1 - python3 - - - data - TVM - 1 3 224 224 - False - 0.485 0.456 0.406 - 0.229 0.224 0.225 - - NCHW - llvm - Relay - 0 - - - \ No newline at end of file diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml deleted file mode 100644 index 50b0b63d5..000000000 --- a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - classification - googlenet-v1 - FP32 - TVM - ../models_dir/public/googlenet-v1/googlenet-v1.prototxt - ../models_dir/public/googlenet-v1/googlenet-v1.caffemodel - - - Data - ../test_images/black_square.jpg - - - TVM - 1 - CPU - 5 - 1 - python3 - - - data - Caffe - 1 3 224 224 - False - 0.485 0.456 0.406 - 0.229 0.224 0.225 - - NCHW - Relay - llvm - 0 - - - \ No newline at end of file