diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml
index 127513ee3..f85a71fa8 100644
--- a/.github/workflows/smoke_test.yml
+++ b/.github/workflows/smoke_test.yml
@@ -52,45 +52,6 @@ jobs:
path: tests/smoke_test/ac_smoke/report.html
retention-days: 1
- smoke_test_benchmark_caffe:
- strategy:
- matrix:
- os: [ubuntu-22.04]
- runs-on: ${{ matrix.os }}
- steps:
- - name: Code checkout
- uses: actions/checkout@v3
-
- - name: Setting up miniconda
- uses: conda-incubator/setup-miniconda@v2
- with:
- miniconda-version: "latest"
- auto-update-conda: true
- activate-environment: caffe_env_3.7
- python-version: 3.7
-
- - name: Setting up Python dependencies and Caffe
- shell: bash -el {0}
- run: |
- python -m pip install --upgrade pip
- python -m pip install -r requirements_ci.txt
- python -m pip install openvino-dev
- python -m pip install apache-tvm
- conda install -y caffe
-
- - name: Run smoke test for inference benchmark
- shell: bash -el {0}
- run: |
- cd tests/smoke_test/benchmark_smoke && python -m pytest test_benchmark_smoke.py -m="caffe"
-
- - name: Upload benchmark caffe artifacts
- uses: actions/upload-artifact@v4
- if: always()
- with:
- name: benchmark_report_caffe
- path: tests/smoke_test/benchmark_smoke/report.html
- retention-days: 1
-
smoke_test_quantization:
strategy:
matrix:
diff --git a/src/benchmark/frameworks/tvm/tvm_parameters_parser.py b/src/benchmark/frameworks/tvm/tvm_parameters_parser.py
index 135cae2fb..1af961588 100644
--- a/src/benchmark/frameworks/tvm/tvm_parameters_parser.py
+++ b/src/benchmark/frameworks/tvm/tvm_parameters_parser.py
@@ -16,6 +16,7 @@ def parse_parameters(self, curr_test):
CONFIG_FRAMEWORK_DEPENDENT_CHANNEL_SWAP_TAG = 'ChannelSwap'
CONFIG_FRAMEWORK_DEPENDENT_LAYOUT_TAG = 'Layout'
CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API = 'HighLevelAPI'
+ CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING = 'FewShotTuning'
dep_parameters_tag = curr_test.getElementsByTagName(CONFIG_FRAMEWORK_DEPENDENT_TAG)[0]
@@ -42,6 +43,10 @@ def parse_parameters(self, curr_test):
_high_level_api = dep_parameters_tag.getElementsByTagName(
CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API)[0].firstChild
+ _few_shot_tuning_tags = dep_parameters_tag.getElementsByTagName(
+ CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING)
+ _few_shot_tuning = _few_shot_tuning_tags[0].firstChild if _few_shot_tuning_tags else None
+
return TVMParameters(
framework=_framework.data if _framework else None,
input_name=_input_name.data if _input_name else None,
@@ -54,6 +59,7 @@ def parse_parameters(self, curr_test):
layout=_layout.data if _layout else None,
target=_target.data if _target else None,
high_level_api=_high_level_api.data if _high_level_api else None,
+ few_shot_tuning=_few_shot_tuning.data if _few_shot_tuning else None,
)
@@ -61,7 +67,7 @@ class TVMParameters(FrameworkParameters):
def __init__(self, framework, input_name, input_shape,
normalize, mean, std, channel_swap,
optimization_level, layout, target,
- high_level_api):
+ high_level_api, few_shot_tuning=None):
self.framework = None
self.input_name = None
self.input_shape = None
@@ -73,6 +79,7 @@ def __init__(self, framework, input_name, input_shape,
self.layout = None
self.target = 'llvm'
self.high_level_api = None
+ self.few_shot_tuning = None
if self._framework_is_correct(framework):
self.framework = framework
@@ -96,6 +103,8 @@ def __init__(self, framework, input_name, input_shape,
self.target = target
if self._parameter_is_not_none(high_level_api):
self.high_level_api = high_level_api
+ if self._parameter_is_not_none(few_shot_tuning):
+ self.few_shot_tuning = few_shot_tuning
@staticmethod
def _framework_is_correct(framework):
diff --git a/src/benchmark/frameworks/tvm/tvm_process.py b/src/benchmark/frameworks/tvm/tvm_process.py
index 7c6df753b..21f336103 100644
--- a/src/benchmark/frameworks/tvm/tvm_process.py
+++ b/src/benchmark/frameworks/tvm/tvm_process.py
@@ -77,13 +77,19 @@ def _fill_command_line(self):
common_params, '--opt_level', opt_level)
target = self._test.dep_parameters.target
- common_params = TVMProcess._add_optional_argument_to_cmd_line(
- common_params, '--target', target)
+ if target:
+ common_params = TVMProcess._add_argument_to_cmd_line(
+ common_params, '--target', f'"{target}"')
high_level_api = self._test.dep_parameters.high_level_api
common_params = TVMProcess._add_optional_argument_to_cmd_line(
common_params, '--high_level_api', high_level_api)
+ few_shot_tuning = self._test.dep_parameters.few_shot_tuning
+ if few_shot_tuning == 'True':
+ common_params = TVMProcess._add_flag_to_cmd_line(
+ common_params, '--few_shot_tuning')
+
return f'{common_params}'
diff --git a/src/benchmark/frameworks/tvm/tvm_test.py b/src/benchmark/frameworks/tvm/tvm_test.py
index cabb43732..7d918b51e 100644
--- a/src/benchmark/frameworks/tvm/tvm_test.py
+++ b/src/benchmark/frameworks/tvm/tvm_test.py
@@ -14,6 +14,8 @@ def get_report(self, process):
parameters.update({'Framework': self.dep_parameters.framework})
parameters.update({'HighLevelAPI': self.dep_parameters.high_level_api})
parameters.update({'Optimization level': self.dep_parameters.optimization_level})
+ if self.dep_parameters.few_shot_tuning == 'True':
+ parameters.update({'FewShotTuning': 'True'})
other_param = self._get_optional_parameters_string(parameters)
report_res = {
diff --git a/src/configs/README.md b/src/configs/README.md
index 068db32c7..389031123 100644
--- a/src/configs/README.md
+++ b/src/configs/README.md
@@ -357,6 +357,9 @@
- `HighLevelAPI` - тег, необязательный для заполнения. Определяет используемое высокоуровневое API: `Relay`, `RelayVM` или `RelaxVM`. По умолчанию задается значение `Relay`.
- `OptimizationLevel` - тег, необязательный для заполнения. Определяет уровень оптимизаций для
графа вычислений, которые ускоряют инференс. По умолчанию оптимизации не применяются.
+ - `FewShotTuning` - тег, необязательный для заполнения. Применяет FewShotTuning — быструю
+ настройку расписаний ядер для многопоточного вывода на CPU. Допустимые значения: `True`, `False`.
+ Применимо только для `RelaxVM`. По умолчанию не установлен.
- `Framework` - тег, обязательный для заполнения. Определяет фреймворк, модели которого будут
запущены средствами Apache TVM. По умолчанию задается фреймворк `TVM`.
@@ -781,9 +784,10 @@
0.229 0.224 0.225
NCHW
- llvm
+ llvm -num-cores=4
RelaxVM
3
+ True
```
diff --git a/src/configs/benchmark_configuration_file_template.xml b/src/configs/benchmark_configuration_file_template.xml
index cdaa62703..37ef078f1 100644
--- a/src/configs/benchmark_configuration_file_template.xml
+++ b/src/configs/benchmark_configuration_file_template.xml
@@ -449,6 +449,7 @@
+
diff --git a/src/inference/README.md b/src/inference/README.md
index 3157f765c..4e21978be 100644
--- a/src/inference/README.md
+++ b/src/inference/README.md
@@ -961,6 +961,9 @@ inference_tvm.py
- `-ol / --opt_level` - параметр, определяющий уровень оптимизации
графа вычислений нейронной сети для ускорения инференса. По умолчанию
оптимизации не применяются.
+- `--few_shot_tuning` - применить FewShotTuning — быструю настройку расписаний
+ ядер для многопоточного вывода на CPU. Применимо только для `RelaxVM`.
+ По умолчанию не установлен.
- `--raw_output` - работа скрипта без логов. По умолчанию не установлен.
Аргументы, необходимые для инференса моделей MXNet с использованием Apache TVM:
diff --git a/src/inference/inference_tvm.py b/src/inference/inference_tvm.py
index 12a504ba9..eaf226c4c 100644
--- a/src/inference/inference_tvm.py
+++ b/src/inference/inference_tvm.py
@@ -172,6 +172,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
+ parser.add_argument('--few_shot_tuning',
+ help='Apply FewShotTuning scheduling pass for RelaxVM '
+ 'to enable multi-threaded CPU inference.',
+ action='store_true',
+ dest='few_shot_tuning')
parser.add_argument('--raw_output',
help='Raw output without logs.',
default=False,
diff --git a/src/inference/tvm_auxiliary.py b/src/inference/tvm_auxiliary.py
index 39e922ad1..900b74cec 100644
--- a/src/inference/tvm_auxiliary.py
+++ b/src/inference/tvm_auxiliary.py
@@ -182,6 +182,7 @@ def create_dict_for_converter(args):
'module': args.module,
'high_level_api': args.high_level_api,
'source_framework': args.source_framework,
+ 'few_shot_tuning': getattr(args, 'few_shot_tuning', False),
}
return dictionary
diff --git a/src/model_converters/tvm_converter/README.md b/src/model_converters/tvm_converter/README.md
index d35c5cac1..7875cafd3 100644
--- a/src/model_converters/tvm_converter/README.md
+++ b/src/model_converters/tvm_converter/README.md
@@ -45,6 +45,10 @@ This script converts model from `` to the TVM format.
- `-d / --device` is a target device for inference. It equals `CPU`
by default.
- `-op / --output_dir` is path to save the model.
+- `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`.
+ It equals `Relay` by default.
+- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM`
+ to enable multi-threaded CPU inference. Disabled by default.
### Examples of usage
@@ -83,6 +87,8 @@ for the Relay API or to the `.so`+`.ro` format for the VirtualMachine API.
- `-t / --target` is target device information, for example `llvm` for CPU.
- `--opt_level` is the optimization level of the task extractions.
- `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`.
+- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM`
+ to enable multi-threaded CPU inference. Disabled by default.
- `--lib_name` is a file name to save compiled model.
- `-op / --output_dir` is a path to save the model.
@@ -95,5 +101,7 @@ python3 ./tvm_compiler.py -m efficientnet-b0.json -p efficientnet-b0.params \
```sh
python3 ./tvm_compiler.py -m resnet50.json -p resnet50.params \
- -t llvm --opt_level 1 --lib_name resnet50.so
+ -t "llvm -num-cores=16" --opt_level 3 \
+ --high_level_api RelaxVM --few_shot_tuning \
+ --lib_name resnet50.so
```
diff --git a/src/model_converters/tvm_converter/tvm_auxiliary/converter.py b/src/model_converters/tvm_converter/tvm_auxiliary/converter.py
index b2f9a2d68..6124e506d 100644
--- a/src/model_converters/tvm_converter/tvm_auxiliary/converter.py
+++ b/src/model_converters/tvm_converter/tvm_auxiliary/converter.py
@@ -22,6 +22,7 @@ def __init__(self, args):
self.target_str = args.get('target', None)
self.module = args.get('module', None)
self.high_level_api = args.get('high_level_api', None)
+ self.few_shot_tuning = args.get('few_shot_tuning', False)
self.output_dir = args.get('output_dir', None)
self.lib_name = args.get('lib_name', None)
@@ -112,8 +113,10 @@ def save_tvm_model(self):
fo.write(self.tvm.ir.save_json(self.mod))
def get_graph_module_from_lib(self, lib):
- if self.high_level_api in ['Relay', 'RelayVM']:
+ if self.high_level_api == 'Relay':
return self.__get_graph_module_from_relay_lib(lib)
+ elif self.high_level_api == 'RelayVM':
+ return self.__get_graph_module_from_relay_vm_lib(lib)
elif self.high_level_api == 'RelaxVM':
return self.__get_graph_module_from_relax_vm_lib(lib)
else:
@@ -124,6 +127,11 @@ def __get_graph_module_from_relay_lib(self, lib):
self.graph = self.graph_executor.GraphModule(lib['default'](dev))
return self.graph
+ def __get_graph_module_from_relay_vm_lib(self, lib):
+ _, dev = self._get_target_device()
+ des_vm = self.tvm.runtime.vm.VirtualMachine(lib, dev)
+ return des_vm
+
def __get_graph_module_from_relax_vm_lib(self, lib):
_, dev = self._get_target_device()
des_vm = self.tvm.relax.VirtualMachine(lib, dev)
@@ -155,9 +163,46 @@ def __get_lib_from_relay_vm(self, target, model):
code, lib = executable.save()
return code, lib
+ def _build_relax_vm_pipeline(self):
+ relax_transform = self.tvm.relax.transform
+
+ passes = [
+ relax_transform.LegalizeOps(),
+ relax_transform.AnnotateTIROpPattern(),
+ relax_transform.FoldConstant(),
+ relax_transform.FuseOps(),
+ relax_transform.FuseTIR(),
+ ]
+
+ if self.few_shot_tuning:
+ self.log.info('Applying FewShotTuning for Relax VM')
+ passes.append(relax_transform.FewShotTuning(valid_count=1, benchmark=False))
+
+ passes.extend([
+ relax_transform.RewriteDataflowReshape(),
+ relax_transform.ToNonDataflow(),
+ relax_transform.RemovePurityChecking(),
+ relax_transform.CallTIRRewrite(),
+ relax_transform.StaticPlanBlockMemory(),
+ relax_transform.LowerAllocTensor(),
+ relax_transform.KillAfterLastUse(),
+ relax_transform.LowerRuntimeBuiltin(),
+ relax_transform.ComputePrimValue(),
+ relax_transform.VMShapeLower(),
+ relax_transform.AttachGlobalSymbol(),
+ ])
+
+ @self.tvm.transform.module_pass(opt_level=0)
+ def pipeline(mod, _ctx):
+ return self.tvm.transform.Sequential(passes)(mod)
+
+ return pipeline
+
def __get_lib_from_relax_vm(self, target, model):
+ pipeline = self._build_relax_vm_pipeline()
with self.tvm.transform.PassContext(opt_level=self.opt_level):
- lib = self.tvm.relax.build(model[0], target=target, params=model[1])
+ lib = self.tvm.relax.build(model[0], target=target, params=model[1],
+ pipeline=pipeline)
return [lib]
def export_lib(self):
@@ -187,7 +232,7 @@ def __get_graph_module_from_relay(self, mod, params, target, dev):
def __get_graph_module_from_relay_vm(self, mod, params, target, dev):
vm = self.tvm.runtime.vm
rly_vm = self.tvm.relay.vm
- if self.mod_type == 'so' and self.params_type == 'ro':
+ if self.mod_type in ['so', 'tar'] and self.params_type == 'ro':
executable = vm.Executable.load_exec(params, mod)
else:
with self.tvm.transform.PassContext(opt_level=self.opt_level):
@@ -196,8 +241,10 @@ def __get_graph_module_from_relay_vm(self, mod, params, target, dev):
return des_vm
def __get_graph_module_from_relax_vm(self, mod, params, target, dev):
+ pipeline = self._build_relax_vm_pipeline()
with self.tvm.transform.PassContext(opt_level=self.opt_level):
- executable = self.tvm.relax.build(mod, target=target, params=params)
+ executable = self.tvm.relax.build(mod, target=target, params=params,
+ pipeline=pipeline)
des_vm = self.tvm.relax.VirtualMachine(executable, dev)
return des_vm
diff --git a/src/model_converters/tvm_converter/tvm_compiler.py b/src/model_converters/tvm_converter/tvm_compiler.py
index 9ffd71f6c..98844ac58 100644
--- a/src/model_converters/tvm_converter/tvm_compiler.py
+++ b/src/model_converters/tvm_converter/tvm_compiler.py
@@ -47,6 +47,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
+ parser.add_argument('--few_shot_tuning',
+ help='Apply FewShotTuning scheduling pass for RelaxVM '
+ 'to enable multi-threaded CPU inference.',
+ action='store_true',
+ dest='few_shot_tuning')
args = parser.parse_args()
return args
@@ -61,6 +66,7 @@ def create_dict_for_compilation(args):
'lib_name': args.lib_name,
'output_dir': args.output_dir,
'high_level_api': args.high_level_api,
+ 'few_shot_tuning': args.few_shot_tuning,
}
return dictionary
diff --git a/src/model_converters/tvm_converter/tvm_converter.py b/src/model_converters/tvm_converter/tvm_converter.py
index 86dea2c6d..7828cdeb8 100644
--- a/src/model_converters/tvm_converter/tvm_converter.py
+++ b/src/model_converters/tvm_converter/tvm_converter.py
@@ -71,6 +71,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
+ parser.add_argument('--few_shot_tuning',
+ help='Apply FewShotTuning scheduling pass for RelaxVM '
+ 'to enable multi-threaded CPU inference.',
+ action='store_true',
+ dest='few_shot_tuning')
args = parser.parse_args()
return args
@@ -87,6 +92,7 @@ def create_dict_for_converter(args):
'output_dir': args.output_dir,
'source_framework': args.source_framework,
'high_level_api': args.high_level_api,
+ 'few_shot_tuning': args.few_shot_tuning,
}
return dictionary
diff --git a/tests/smoke_test/benchmark_smoke/conftest.py b/tests/smoke_test/benchmark_smoke/conftest.py
index f2794f9ed..c890c0ea5 100644
--- a/tests/smoke_test/benchmark_smoke/conftest.py
+++ b/tests/smoke_test/benchmark_smoke/conftest.py
@@ -19,7 +19,7 @@
'person-detection-action-recognition-0006', 'person-detection-raisinghand-recognition-0001',
'person-detection-action-recognition-teacher-0002', 'yolo-v2-ava-0001', 'yolo-v2-tiny-ava-0001',
'yolo-v2-tf', 'yolo-v3-tf']
-DL_CAFFE_MODELS = ['googlenet-v1']
+DL_CAFFE_MODELS = []
def pytest_addoption(parser):
@@ -215,10 +215,8 @@ def pytest_generate_tests(metafunc):
param_list.append(smoke_test_params(**params))
id_list.append(config_file.stem)
- # Mark Caffe tests
+ # Mark tests
for i, test_param in enumerate(param_list):
- if test_param.config_name in ['googlenet-v1_Caffe', 'googlenet-v1_TVM_Caffe', 'googlenet-v1_TVM']:
- param_list[i] = pytest.param(test_param, marks=pytest.mark.caffe)
if test_param.config_name in ['dgl']:
param_list[i] = pytest.param(test_param)
diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml
deleted file mode 100644
index e1979c1f5..000000000
--- a/tests/smoke_test/configs/dl_models/googlenet-v1_Caffe.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-
-
-
-
- classification
- googlenet-v1
- FP32
- Caffe
- ../models_dir/public/googlenet-v1/googlenet-v1.prototxt
- ../models_dir/public/googlenet-v1/googlenet-v1.caffemodel
-
-
- Data
- ../test_images/black_square.jpg
-
-
- Caffe
- 1
- CPU
- 5
- 1
- python3
-
-
-
- 104.0 117.0 123.0
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml
deleted file mode 100644
index c0622a31a..000000000
--- a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
- classification
- googlenet-v1
- FP32
- TVM
- ../models_dir/public/googlenet-v1/googlenet-v1.json
- ../models_dir/public/googlenet-v1/googlenet-v1.params
-
-
- Data
- ../test_images/black_square.jpg
-
-
- TVM
- 1
- CPU
- 5
- 1
- python3
-
-
- data
- TVM
- 1 3 224 224
- False
- 0.485 0.456 0.406
- 0.229 0.224 0.225
-
- NCHW
- llvm
- Relay
- 0
-
-
-
\ No newline at end of file
diff --git a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml b/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml
deleted file mode 100644
index 50b0b63d5..000000000
--- a/tests/smoke_test/configs/dl_models/googlenet-v1_TVM_Caffe.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
- classification
- googlenet-v1
- FP32
- TVM
- ../models_dir/public/googlenet-v1/googlenet-v1.prototxt
- ../models_dir/public/googlenet-v1/googlenet-v1.caffemodel
-
-
- Data
- ../test_images/black_square.jpg
-
-
- TVM
- 1
- CPU
- 5
- 1
- python3
-
-
- data
- Caffe
- 1 3 224 224
- False
- 0.485 0.456 0.406
- 0.229 0.224 0.225
-
- NCHW
- Relay
- llvm
- 0
-
-
-
\ No newline at end of file