From 6ffb9d46e0bb4750e123300bb2a12ceb167113ef Mon Sep 17 00:00:00 2001 From: Lingxiao Ma Date: Sun, 28 Nov 2021 16:57:14 +0900 Subject: [PATCH 1/2] Support -ftuning_list in kernel tuning pass --- .../engine/pass/graph/kernel_tuning.cpp | 30 ++++++++++++++++++- .../engine/pass/graph/kernel_tuning.hpp | 2 ++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp index b9a699c5a..6b689e005 100644 --- a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp +++ b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp @@ -20,6 +20,7 @@ DEFINE_int64(fkernel_tuning_steps, 0, "Enable automatic kernel tuning for maximu DEFINE_string(ftuning_blocklist, "", "List of op types that skip kernel tuning pass, e.g., \"Softmax,Add\""); +DEFINE_string(ftuning_list, "", "List of op types for kernel tuning pass, e.g., \"Softmax,Add\""); DEFINE_string(fantares_perf_file, "./antares_perf.csv", "File to save Antares kernel performance."); DECLARE_bool(fantares_mode); DECLARE_string(fantares_codegen_server); @@ -130,6 +131,7 @@ void dump_perf(std::string filename, std::pair>, std::vector>> get_tuning_candidates(std::shared_ptr& graph, + const std::unordered_set tuning_list, const std::unordered_set block_list, std::unordered_map& ir2cnt) { @@ -147,6 +149,12 @@ std::pair>, std::vector(); NNFUSION_CHECK(n_device_type != UNKNOWN); + // filter ops not in TuningList + if (tuning_list.find(gnode->get_op_type()) == tuning_list.end()) + { + continue; + } + // filter ops in BlockList if (block_list.find(gnode->get_op_type()) != block_list.end()) { @@ -255,11 +263,31 @@ bool KernelTuning::parse_block_list() NNFUSION_LOG(INFO) << "Kernel Tuning BlockList: " << join(BlockList, ", "); } +bool KernelTuning::parse_tuning_list() +{ + auto tuninglist_str = FLAGS_ftuning_list; + stringstream ss(tuninglist_str); + while (ss.good()) + { + string substr; + getline(ss, substr, ','); + TuningList.insert(substr); + } + NNFUSION_LOG(INFO) << "Kernel Tuning List: " << join(TuningList, ", "); +} + bool KernelTuning::run_on_graph(std::shared_ptr& graph) { if (FLAGS_fantares_mode) { + parse_tuning_list(); parse_block_list(); + for (auto item : TuningList) + { + NNFUSION_CHECK(BlockList.find(item) == BlockList.end()) + << "Kernel Tuning Pass: There are same operators in TuningList and " + "TuningBlockList."; + } // register antares kernels anyway here in case kernel selection pass will use them register_antares_kernel(); } @@ -274,7 +302,7 @@ bool KernelTuning::run_on_graph(std::shared_ptr& graph) std::vector> tuning_kernels; std::unordered_map ir2cnt; std::vector> nodes; - std::tie(nodes, tuned_kernels) = get_tuning_candidates(graph, BlockList, ir2cnt); + std::tie(nodes, tuned_kernels) = get_tuning_candidates(graph, TuningList, BlockList, ir2cnt); for (auto gnode : nodes) { if (!(*gnode)["DeviceType"].is_valid()) diff --git a/src/nnfusion/engine/pass/graph/kernel_tuning.hpp b/src/nnfusion/engine/pass/graph/kernel_tuning.hpp index 1d49761e2..67cf3a592 100644 --- a/src/nnfusion/engine/pass/graph/kernel_tuning.hpp +++ b/src/nnfusion/engine/pass/graph/kernel_tuning.hpp @@ -22,11 +22,13 @@ namespace nnfusion private: bool parse_block_list(); + bool parse_tuning_list(); bool insert_to_kernel_cache( const std::vector>& nodes); private: std::unordered_set BlockList; + std::unordered_set TuningList; }; } } From 5332e23445537c57e3ca581a59b3adbc3dd46f19 Mon Sep 17 00:00:00 2001 From: Lingxiao Ma Date: Sun, 5 Dec 2021 14:00:27 +0800 Subject: [PATCH 2/2] fix bug in AntaresCudaKernelEmitter when IR is empty --- src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp b/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp index fada9792d..baf5b3551 100644 --- a/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp +++ b/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp @@ -287,6 +287,7 @@ namespace nnfusion << ctx->gnode->get_op_type(); log_cache.insert(ctx->gnode->get_op_type()); } + return; } kernel_info =