Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions .github/workflows/smoke_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,45 +52,6 @@ jobs:
path: tests/smoke_test/ac_smoke/report.html
retention-days: 1

smoke_test_benchmark_caffe:
strategy:
matrix:
os: [ubuntu-22.04]
runs-on: ${{ matrix.os }}
steps:
- name: Code checkout
uses: actions/checkout@v3

- name: Setting up miniconda
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
auto-update-conda: true
activate-environment: caffe_env_3.7
python-version: 3.7

- name: Setting up Python dependencies and Caffe
shell: bash -el {0}
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements_ci.txt
python -m pip install openvino-dev
python -m pip install apache-tvm
conda install -y caffe

- name: Run smoke test for inference benchmark
shell: bash -el {0}
run: |
cd tests/smoke_test/benchmark_smoke && python -m pytest test_benchmark_smoke.py -m="caffe"

- name: Upload benchmark caffe artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark_report_caffe
path: tests/smoke_test/benchmark_smoke/report.html
retention-days: 1

smoke_test_quantization:
strategy:
matrix:
Expand Down
11 changes: 10 additions & 1 deletion src/benchmark/frameworks/tvm/tvm_parameters_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def parse_parameters(self, curr_test):
CONFIG_FRAMEWORK_DEPENDENT_CHANNEL_SWAP_TAG = 'ChannelSwap'
CONFIG_FRAMEWORK_DEPENDENT_LAYOUT_TAG = 'Layout'
CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API = 'HighLevelAPI'
CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING = 'FewShotTuning'

dep_parameters_tag = curr_test.getElementsByTagName(CONFIG_FRAMEWORK_DEPENDENT_TAG)[0]

Expand All @@ -42,6 +43,10 @@ def parse_parameters(self, curr_test):
_high_level_api = dep_parameters_tag.getElementsByTagName(
CONFIG_FRAMEWORK_DEPENDENT_HIGH_LEVEL_API)[0].firstChild

_few_shot_tuning_tags = dep_parameters_tag.getElementsByTagName(
CONFIG_FRAMEWORK_DEPENDENT_FEW_SHOT_TUNING)
_few_shot_tuning = _few_shot_tuning_tags[0].firstChild if _few_shot_tuning_tags else None

return TVMParameters(
framework=_framework.data if _framework else None,
input_name=_input_name.data if _input_name else None,
Expand All @@ -54,14 +59,15 @@ def parse_parameters(self, curr_test):
layout=_layout.data if _layout else None,
target=_target.data if _target else None,
high_level_api=_high_level_api.data if _high_level_api else None,
few_shot_tuning=_few_shot_tuning.data if _few_shot_tuning else None,
)


class TVMParameters(FrameworkParameters):
def __init__(self, framework, input_name, input_shape,
normalize, mean, std, channel_swap,
optimization_level, layout, target,
high_level_api):
high_level_api, few_shot_tuning=None):
self.framework = None
self.input_name = None
self.input_shape = None
Expand All @@ -73,6 +79,7 @@ def __init__(self, framework, input_name, input_shape,
self.layout = None
self.target = 'llvm'
self.high_level_api = None
self.few_shot_tuning = None

if self._framework_is_correct(framework):
self.framework = framework
Expand All @@ -96,6 +103,8 @@ def __init__(self, framework, input_name, input_shape,
self.target = target
if self._parameter_is_not_none(high_level_api):
self.high_level_api = high_level_api
if self._parameter_is_not_none(few_shot_tuning):
self.few_shot_tuning = few_shot_tuning

@staticmethod
def _framework_is_correct(framework):
Expand Down
10 changes: 8 additions & 2 deletions src/benchmark/frameworks/tvm/tvm_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,19 @@ def _fill_command_line(self):
common_params, '--opt_level', opt_level)

target = self._test.dep_parameters.target
common_params = TVMProcess._add_optional_argument_to_cmd_line(
common_params, '--target', target)
if target:
common_params = TVMProcess._add_argument_to_cmd_line(
common_params, '--target', f'"{target}"')

high_level_api = self._test.dep_parameters.high_level_api
common_params = TVMProcess._add_optional_argument_to_cmd_line(
common_params, '--high_level_api', high_level_api)

few_shot_tuning = self._test.dep_parameters.few_shot_tuning
if few_shot_tuning == 'True':
common_params = TVMProcess._add_flag_to_cmd_line(
common_params, '--few_shot_tuning')

return f'{common_params}'


Expand Down
2 changes: 2 additions & 0 deletions src/benchmark/frameworks/tvm/tvm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def get_report(self, process):
parameters.update({'Framework': self.dep_parameters.framework})
parameters.update({'HighLevelAPI': self.dep_parameters.high_level_api})
parameters.update({'Optimization level': self.dep_parameters.optimization_level})
if self.dep_parameters.few_shot_tuning == 'True':
parameters.update({'FewShotTuning': 'True'})
other_param = self._get_optional_parameters_string(parameters)

report_res = {
Expand Down
6 changes: 5 additions & 1 deletion src/configs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@
- `HighLevelAPI` - тег, необязательный для заполнения. Определяет используемое высокоуровневое API: `Relay`, `RelayVM` или `RelaxVM`. По умолчанию задается значение `Relay`.
- `OptimizationLevel` - тег, необязательный для заполнения. Определяет уровень оптимизаций для
графа вычислений, которые ускоряют инференс. По умолчанию оптимизации не применяются.
- `FewShotTuning` - тег, необязательный для заполнения. Применяет FewShotTuning — быструю
настройку расписаний ядер для многопоточного вывода на CPU. Допустимые значения: `True`, `False`.
Применимо только для `RelaxVM`. По умолчанию не установлен.
- `Framework` - тег, обязательный для заполнения. Определяет фреймворк, модели которого будут
запущены средствами Apache TVM. По умолчанию задается фреймворк `TVM`.

Expand Down Expand Up @@ -781,9 +784,10 @@
<Std>0.229 0.224 0.225</Std>
<ChannelSwap></ChannelSwap>
<Layout>NCHW</Layout>
<Target>llvm</Target>
<Target>llvm -num-cores=4</Target>
<HighLevelAPI>RelaxVM</HighLevelAPI>
<OptimizationLevel>3</OptimizationLevel>
<FewShotTuning>True</FewShotTuning>
</FrameworkDependent>
</Test>
```
Expand Down
1 change: 1 addition & 0 deletions src/configs/benchmark_configuration_file_template.xml
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@
<HighLevelAPI></HighLevelAPI>
<Target></Target>
<OptimizationLevel></OptimizationLevel>
<FewShotTuning></FewShotTuning> <!--True для RelaxVM: применяет FewShotTuning для многопоточного CPU-->
</FrameworkDependent>
</Test>
<Test>
Expand Down
3 changes: 3 additions & 0 deletions src/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,9 @@ inference_tvm.py
- `-ol / --opt_level` - параметр, определяющий уровень оптимизации
графа вычислений нейронной сети для ускорения инференса. По умолчанию
оптимизации не применяются.
- `--few_shot_tuning` - применить FewShotTuning — быструю настройку расписаний
ядер для многопоточного вывода на CPU. Применимо только для `RelaxVM`.
По умолчанию не установлен.
- `--raw_output` - работа скрипта без логов. По умолчанию не установлен.

Аргументы, необходимые для инференса моделей MXNet с использованием Apache TVM:
Expand Down
5 changes: 5 additions & 0 deletions src/inference/inference_tvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
parser.add_argument('--few_shot_tuning',
help='Apply FewShotTuning scheduling pass for RelaxVM '
'to enable multi-threaded CPU inference.',
action='store_true',
dest='few_shot_tuning')
parser.add_argument('--raw_output',
help='Raw output without logs.',
default=False,
Expand Down
1 change: 1 addition & 0 deletions src/inference/tvm_auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def create_dict_for_converter(args):
'module': args.module,
'high_level_api': args.high_level_api,
'source_framework': args.source_framework,
'few_shot_tuning': getattr(args, 'few_shot_tuning', False),
}
return dictionary

Expand Down
10 changes: 9 additions & 1 deletion src/model_converters/tvm_converter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ This script converts model from `<source_framework>` to the TVM format.
- `-d / --device` is a target device for inference. It equals `CPU`
by default.
- `-op / --output_dir` is path to save the model.
- `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`.
It equals `Relay` by default.
- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM`
to enable multi-threaded CPU inference. Disabled by default.

### Examples of usage

Expand Down Expand Up @@ -83,6 +87,8 @@ for the Relay API or to the `.so`+`.ro` format for the VirtualMachine API.
- `-t / --target` is target device information, for example `llvm` for CPU.
- `--opt_level` is the optimization level of the task extractions.
- `--high_level_api` is a high level API: `Relay`, `RelayVM`, `RelaxVM`.
- `--few_shot_tuning` applies FewShotTuning scheduling pass for `RelaxVM`
to enable multi-threaded CPU inference. Disabled by default.
- `--lib_name` is a file name to save compiled model.
- `-op / --output_dir` is a path to save the model.

Expand All @@ -95,5 +101,7 @@ python3 ./tvm_compiler.py -m efficientnet-b0.json -p efficientnet-b0.params \

```sh
python3 ./tvm_compiler.py -m resnet50.json -p resnet50.params \
-t llvm --opt_level 1 --lib_name resnet50.so
-t "llvm -num-cores=16" --opt_level 3 \
--high_level_api RelaxVM --few_shot_tuning \
--lib_name resnet50.so
```
55 changes: 51 additions & 4 deletions src/model_converters/tvm_converter/tvm_auxiliary/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, args):
self.target_str = args.get('target', None)
self.module = args.get('module', None)
self.high_level_api = args.get('high_level_api', None)
self.few_shot_tuning = args.get('few_shot_tuning', False)

self.output_dir = args.get('output_dir', None)
self.lib_name = args.get('lib_name', None)
Expand Down Expand Up @@ -112,8 +113,10 @@ def save_tvm_model(self):
fo.write(self.tvm.ir.save_json(self.mod))

def get_graph_module_from_lib(self, lib):
if self.high_level_api in ['Relay', 'RelayVM']:
if self.high_level_api == 'Relay':
return self.__get_graph_module_from_relay_lib(lib)
elif self.high_level_api == 'RelayVM':
return self.__get_graph_module_from_relay_vm_lib(lib)
elif self.high_level_api == 'RelaxVM':
return self.__get_graph_module_from_relax_vm_lib(lib)
else:
Expand All @@ -124,6 +127,11 @@ def __get_graph_module_from_relay_lib(self, lib):
self.graph = self.graph_executor.GraphModule(lib['default'](dev))
return self.graph

def __get_graph_module_from_relay_vm_lib(self, lib):
_, dev = self._get_target_device()
des_vm = self.tvm.runtime.vm.VirtualMachine(lib, dev)
return des_vm

def __get_graph_module_from_relax_vm_lib(self, lib):
_, dev = self._get_target_device()
des_vm = self.tvm.relax.VirtualMachine(lib, dev)
Expand Down Expand Up @@ -155,9 +163,46 @@ def __get_lib_from_relay_vm(self, target, model):
code, lib = executable.save()
return code, lib

def _build_relax_vm_pipeline(self):
relax_transform = self.tvm.relax.transform

passes = [
relax_transform.LegalizeOps(),
relax_transform.AnnotateTIROpPattern(),
relax_transform.FoldConstant(),
relax_transform.FuseOps(),
relax_transform.FuseTIR(),
]

if self.few_shot_tuning:
self.log.info('Applying FewShotTuning for Relax VM')
passes.append(relax_transform.FewShotTuning(valid_count=1, benchmark=False))

passes.extend([
relax_transform.RewriteDataflowReshape(),
relax_transform.ToNonDataflow(),
relax_transform.RemovePurityChecking(),
relax_transform.CallTIRRewrite(),
relax_transform.StaticPlanBlockMemory(),
relax_transform.LowerAllocTensor(),
relax_transform.KillAfterLastUse(),
relax_transform.LowerRuntimeBuiltin(),
relax_transform.ComputePrimValue(),
relax_transform.VMShapeLower(),
relax_transform.AttachGlobalSymbol(),
])

@self.tvm.transform.module_pass(opt_level=0)
def pipeline(mod, _ctx):
return self.tvm.transform.Sequential(passes)(mod)

return pipeline

def __get_lib_from_relax_vm(self, target, model):
pipeline = self._build_relax_vm_pipeline()
with self.tvm.transform.PassContext(opt_level=self.opt_level):
lib = self.tvm.relax.build(model[0], target=target, params=model[1])
lib = self.tvm.relax.build(model[0], target=target, params=model[1],
pipeline=pipeline)
return [lib]

def export_lib(self):
Expand Down Expand Up @@ -187,7 +232,7 @@ def __get_graph_module_from_relay(self, mod, params, target, dev):
def __get_graph_module_from_relay_vm(self, mod, params, target, dev):
vm = self.tvm.runtime.vm
rly_vm = self.tvm.relay.vm
if self.mod_type == 'so' and self.params_type == 'ro':
if self.mod_type in ['so', 'tar'] and self.params_type == 'ro':
executable = vm.Executable.load_exec(params, mod)
else:
with self.tvm.transform.PassContext(opt_level=self.opt_level):
Expand All @@ -196,8 +241,10 @@ def __get_graph_module_from_relay_vm(self, mod, params, target, dev):
return des_vm

def __get_graph_module_from_relax_vm(self, mod, params, target, dev):
pipeline = self._build_relax_vm_pipeline()
with self.tvm.transform.PassContext(opt_level=self.opt_level):
executable = self.tvm.relax.build(mod, target=target, params=params)
executable = self.tvm.relax.build(mod, target=target, params=params,
pipeline=pipeline)
des_vm = self.tvm.relax.VirtualMachine(executable, dev)
return des_vm

Expand Down
6 changes: 6 additions & 0 deletions src/model_converters/tvm_converter/tvm_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
parser.add_argument('--few_shot_tuning',
help='Apply FewShotTuning scheduling pass for RelaxVM '
'to enable multi-threaded CPU inference.',
action='store_true',
dest='few_shot_tuning')
args = parser.parse_args()
return args

Expand All @@ -61,6 +66,7 @@ def create_dict_for_compilation(args):
'lib_name': args.lib_name,
'output_dir': args.output_dir,
'high_level_api': args.high_level_api,
'few_shot_tuning': args.few_shot_tuning,
}
return dictionary

Expand Down
6 changes: 6 additions & 0 deletions src/model_converters/tvm_converter/tvm_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ def cli_argument_parser():
default='Relay',
type=str,
dest='high_level_api')
parser.add_argument('--few_shot_tuning',
help='Apply FewShotTuning scheduling pass for RelaxVM '
'to enable multi-threaded CPU inference.',
action='store_true',
dest='few_shot_tuning')
args = parser.parse_args()
return args

Expand All @@ -87,6 +92,7 @@ def create_dict_for_converter(args):
'output_dir': args.output_dir,
'source_framework': args.source_framework,
'high_level_api': args.high_level_api,
'few_shot_tuning': args.few_shot_tuning,
}
return dictionary

Expand Down
6 changes: 2 additions & 4 deletions tests/smoke_test/benchmark_smoke/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
'person-detection-action-recognition-0006', 'person-detection-raisinghand-recognition-0001',
'person-detection-action-recognition-teacher-0002', 'yolo-v2-ava-0001', 'yolo-v2-tiny-ava-0001',
'yolo-v2-tf', 'yolo-v3-tf']
DL_CAFFE_MODELS = ['googlenet-v1']
DL_CAFFE_MODELS = []


def pytest_addoption(parser):
Expand Down Expand Up @@ -215,10 +215,8 @@ def pytest_generate_tests(metafunc):
param_list.append(smoke_test_params(**params))
id_list.append(config_file.stem)

# Mark Caffe tests
# Mark tests
for i, test_param in enumerate(param_list):
if test_param.config_name in ['googlenet-v1_Caffe', 'googlenet-v1_TVM_Caffe', 'googlenet-v1_TVM']:
param_list[i] = pytest.param(test_param, marks=pytest.mark.caffe)
if test_param.config_name in ['dgl']:
param_list[i] = pytest.param(test_param)

Expand Down
Loading
Loading