diff --git a/modules/compiler/targets/host/include/host/host_pass_machinery.h b/modules/compiler/targets/host/include/host/host_pass_machinery.h
index cad607cf4f..40f0f0e40d 100644
--- a/modules/compiler/targets/host/include/host/host_pass_machinery.h
+++ b/modules/compiler/targets/host/include/host/host_pass_machinery.h
@@ -21,14 +21,22 @@
#define HOST_PASSES_MACHINERY_H_INCLUDED
#include
-
+#include
#include
namespace llvm {
class TargetMachine;
}
+namespace vecz {
+ class VeczPassOptions;
+}
namespace host {
+struct OptimizationOptions {
+ llvm::SmallVector vecz_pass_opts;
+ bool force_no_tail = false;
+ bool early_link_builtins = false;
+};
class HostPassMachinery final : public compiler::BaseModulePassMachinery {
public:
@@ -63,6 +71,10 @@ class HostPassMachinery final : public compiler::BaseModulePassMachinery {
/// @brief Returns an optimization pass pipeline correponding to
/// BaseModule::getLateTargetPasses.
llvm::ModulePassManager getLateTargetPasses();
+
+ static host::OptimizationOptions processOptimizationOptions(
+ std::optional env_debug_prefix,
+ std::optional vecz_mode);
};
} // namespace host
diff --git a/modules/compiler/targets/host/source/HostPassMachinery.cpp b/modules/compiler/targets/host/source/HostPassMachinery.cpp
index 665c743493..a550f93e1c 100644
--- a/modules/compiler/targets/host/source/HostPassMachinery.cpp
+++ b/modules/compiler/targets/host/source/HostPassMachinery.cpp
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -55,6 +56,99 @@
namespace host {
+// Process various compiler options based off compiler build options and common
+// environment variables
+ host::OptimizationOptions
+HostPassMachinery::processOptimizationOptions(
+ std::optional env_debug_prefix,
+ std::optional vecz_mode) {
+ OptimizationOptions env_var_opts;
+ vecz::VeczPassOptions vecz_opts;
+ // The minimum number of elements to vectorize for. For a fixed-length VF,
+ // this is the exact number of elements to vectorize by. For scalable VFs,
+ // the actual number of elements is a multiple (vscale) of these, unknown at
+ // compile time. Default taken from config. May be overriden later.
+ vecz_opts.factor = compiler::utils::VectorizationFactor::getScalar();
+
+ vecz_opts.choices.enable(vecz::VectorizationChoices::eDivisionExceptions);
+
+ vecz_opts.vecz_auto = vecz_mode == compiler::VectorizationMode::AUTO;
+ vecz_opts.vec_dim_idx = 0;
+
+ // This is of the form of a comma separated set of fields
+ // S - use scalable vectorization
+ // V - vectorize only, otherwise produce both scalar and vector kernels
+ // A - let vecz automatically choose the vectorization factor
+ // 1-64 - vectorization factor multiplier: the fixed amount itself, or the
+ // value that multiplies the scalable amount
+ // VP - produce a vector-predicated kernel
+ // VVP - produce both a vectorized and a vector-predicated kernel
+ bool add_vvp = false;
+ if (const auto *vecz_vf_flags_env = std::getenv("CA_HOST_VF")) {
+ // Set scalable to off and let users add it explicitly with 'S'.
+ vecz_opts.factor.setIsScalable(false);
+ llvm::SmallVector flags;
+ const llvm::StringRef vf_flags_ref(vecz_vf_flags_env);
+ vf_flags_ref.split(flags, ',');
+ for (auto r : flags) {
+ if (r == "A" || r == "a") {
+ vecz_opts.vecz_auto = true;
+ } else if (r == "V" || r == "v") {
+ // Note: This is a legacy toggle for forcing vectorization with no
+ // scalar tail based on the "VF" environment variable. Ideally we'd be
+ // setting it on a per-function basis, and we'd also be setting the
+ // vectorization options themselves on a per-function basis. Until we've
+ // designed a new method, keep the legacy behaviour by re-parsing the
+ // "VF" environment variable and look for a "v/V" toggle.
+ env_var_opts.force_no_tail = true;
+ } else if (r == "S" || r == "s") {
+ vecz_opts.factor.setIsScalable(true);
+ env_var_opts.early_link_builtins = true;
+ } else if (isdigit(r[0])) {
+ vecz_opts.factor.setKnownMin(std::stoi(r.str()));
+ } else if (r == "VP" || r == "vp") {
+ vecz_opts.choices.enable(
+ vecz::VectorizationChoices::eVectorPredication);
+ } else if (r == "VVP" || r == "vvp") {
+ // Add the vectorized pass option now (controlled by other iterations
+ // of this loop), and flag that we have to add a vector-predicated form
+ // later.
+ add_vvp = true;
+ } else {
+ // An error - just stop processing the environment variable now.
+ break;
+ }
+ }
+ }
+
+ // Choices override the cost model
+ const char *ptr = std::getenv("CODEPLAY_VECZ_CHOICES");
+ if (ptr) {
+ const bool success = vecz_opts.choices.parseChoicesString(ptr);
+ if (!success) {
+ llvm::errs() << "failed to parse the CODEPLAY_VECZ_CHOICES variable\n";
+ }
+ }
+
+ env_var_opts.vecz_pass_opts.push_back(vecz_opts);
+ if (add_vvp) {
+ vecz_opts.choices.enable(vecz::VectorizationChoices::eVectorPredication);
+ env_var_opts.vecz_pass_opts.push_back(vecz_opts);
+ }
+
+ // Allow any decisions made on early linking builtins to be overridden
+ // with an env variable
+ if (env_debug_prefix) {
+ const std::string env_name = *env_debug_prefix + "_EARLY_LINK_BUILTINS";
+ if (const char *early_link_builtins_env = getenv(env_name.c_str())) {
+ env_var_opts.early_link_builtins = atoi(early_link_builtins_env) != 0;
+ }
+ }
+
+ return env_var_opts;
+}
+
+
static bool hostVeczPassOpts(
llvm::Function &F, llvm::ModuleAnalysisManager &MAM,
llvm::SmallVectorImpl &Opts) {
@@ -117,7 +211,16 @@ static bool hostVeczPassOpts(
vecz_options.factor =
compiler::utils::VectorizationFactor::getFixedWidth(SIMDWidth);
- Opts.push_back(vecz_options);
+ if (getenv("CA_HOST_VF")) {
+ auto env_var_opts = HostPassMachinery::processOptimizationOptions(
+ /*env_debug_prefix*/ {}, vecz_mode);
+ if (env_var_opts.vecz_pass_opts.empty()) {
+ return false;
+ }
+ Opts.assign(env_var_opts.vecz_pass_opts);
+ } else {
+ Opts.push_back(vecz_options);
+ }
return true;
}
@@ -178,6 +281,7 @@ void HostPassMachinery::registerPassCallbacks() {
bool HostPassMachinery::handlePipelineElement(llvm::StringRef Name,
llvm::ModulePassManager &PM) {
+
if (Name.consume_front("host-late-passes")) {
PM.addPass(getLateTargetPasses());
return true;
@@ -238,6 +342,9 @@ llvm::ModulePassManager HostPassMachinery::getKernelFinalizationPasses(
llvm::ModulePassManager PM;
const compiler::BasePassPipelineTuner tuner(options);
+ auto env_var_opts =
+ processOptimizationOptions("CA_HOST", /* vecz_mode*/ {});
+
// Forcibly compute the BuiltinInfoAnalysis so that cached retrievals work.
PM.addPass(llvm::RequireAnalysisPass());
@@ -246,6 +353,10 @@ llvm::ModulePassManager HostPassMachinery::getKernelFinalizationPasses(
PM.addPass(llvm::createModuleToFunctionPassAdaptor(
compiler::utils::ReplaceAddressSpaceQualifierFunctionsPass()));
+ if (env_var_opts.early_link_builtins) {
+ PM.addPass(compiler::utils::LinkBuiltinsPass());
+ }
+
addPreVeczPasses(PM, tuner);
PM.addPass(vecz::RunVeczPass());
diff --git a/modules/compiler/targets/host/source/info.cpp b/modules/compiler/targets/host/source/info.cpp
index f51b88735e..27ee6e6da1 100644
--- a/modules/compiler/targets/host/source/info.cpp
+++ b/modules/compiler/targets/host/source/info.cpp
@@ -101,7 +101,7 @@ HostInfo::HostInfo(host::arch arch, host::os os,
vectorizable = true;
dma_optimizable = true;
- scalable_vector_support = false;
+ scalable_vector_support = getenv("CA_HOST_VF") ? true : false;
kernel_debug = true;
#ifdef CA_ENABLE_DEBUG_SUPPORT
// Dummy values for testing. Enabled only on debug enabled builds with a
diff --git a/modules/compiler/targets/host/source/kernel.cpp b/modules/compiler/targets/host/source/kernel.cpp
index cb18017388..484387866f 100644
--- a/modules/compiler/targets/host/source/kernel.cpp
+++ b/modules/compiler/targets/host/source/kernel.cpp
@@ -300,11 +300,11 @@ HostKernel::lookupOrCreateOptimizedKernel(std::array local_size) {
}
// Host doesn't support scalable values.
- if (fn_metadata.min_work_item_factor.isScalable() ||
- fn_metadata.pref_work_item_factor.isScalable() ||
- fn_metadata.sub_group_size.isScalable()) {
- return cargo::make_unexpected(compiler::Result::FINALIZE_PROGRAM_FAILURE);
- }
+ // if (fn_metadata.min_work_item_factor.isScalable() ||
+ // fn_metadata.pref_work_item_factor.isScalable() ||
+ // fn_metadata.sub_group_size.isScalable()) {
+ // return cargo::make_unexpected(compiler::Result::FINALIZE_PROGRAM_FAILURE);
+ // }
// Note that we grab a handle to the module here, which we use to reference
// the module going forward. This is despite us passing ownership of the
diff --git a/modules/mux/targets/host/source/metadata_hooks.cpp b/modules/mux/targets/host/source/metadata_hooks.cpp
index fca30c959f..0e1f438766 100644
--- a/modules/mux/targets/host/source/metadata_hooks.cpp
+++ b/modules/mux/targets/host/source/metadata_hooks.cpp
@@ -78,17 +78,21 @@ cargo::optional readBinaryMetadata(loader::ElfFile *elf,
handler::VectorizeInfoMetadata md;
while (handler.read(md)) {
// We don't expect scalable vectorization widths on host.
+ bool isScalable = false;
if (md.min_work_item_factor.isScalable() ||
md.pref_work_item_factor.isScalable()) {
- return cargo::nullopt;
+ printf("Warning: Scalable support is experimental on host target\n");
+ // return cargo::nullopt;
+ isScalable = true;
}
const host::binary_kernel_s kernel{
/*hook*/ 0,
std::move(md.kernel_name),
+ // TODO: Work out sensible values for scalable.
static_cast(md.local_memory_usage),
- md.min_work_item_factor.getFixedValue(),
- md.pref_work_item_factor.getFixedValue(),
- md.sub_group_size.getFixedValue()};
+ isScalable ? 1 : md.min_work_item_factor.getFixedValue(),
+ isScalable ? 1 : md.pref_work_item_factor.getFixedValue(),
+ isScalable ? 1 : md.sub_group_size.getFixedValue()};
auto it = kernels.find(md.source_name);
if (it != kernels.end()) {
it->second.push_back(kernel);
diff --git a/source/cl/test/UnitCL/cmake/CompileKernelToBin.cmake b/source/cl/test/UnitCL/cmake/CompileKernelToBin.cmake
index 032883b42b..b1a7e75dc8 100644
--- a/source/cl/test/UnitCL/cmake/CompileKernelToBin.cmake
+++ b/source/cl/test/UnitCL/cmake/CompileKernelToBin.cmake
@@ -222,6 +222,7 @@ endif()
# ${CLC_EXECUTABLE} may have other things in it (like a qemu invocation). Turn
# it into a CMake list, so that execute_process() isn't confused.
string(REPLACE " " ";" CLC_EXECUTABLE "${CLC_EXECUTABLE}")
+# message("__CSD__ ${CLC_EXECUTABLE} -d ${DEVICE_NAME} -cl-kernel-arg-info -cl-std=CL${CLC_CL_STD} ${CLC_OPTIONS_LIST} ${DEFS_LIST} -o ${OUTPUT_FILE} -- ${INPUT_FILE}")
execute_process(
COMMAND ${CLC_EXECUTABLE}
-d ${DEVICE_NAME}
diff --git a/source/cl/test/UnitCL/kernels/task_01.02_add.bin b/source/cl/test/UnitCL/kernels/task_01.02_add.bin
new file mode 100644
index 0000000000..db338fa5de
Binary files /dev/null and b/source/cl/test/UnitCL/kernels/task_01.02_add.bin differ