diff --git a/CMakeLists.txt b/CMakeLists.txt index c1efd8bdcc..0209e87209 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,28 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + #------------------------------------------------------------------------------- # check compiler features #------------------------------------------------------------------------------- diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 0acbd8ca9b..ff98472642 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -49,6 +49,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE float @@ -287,6 +288,34 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //--------------------------------------------------------------------- + + #if GB_COMPILER_SUPPORTS_RVV1 + + #include + #define VSETVL(x) __riscv_vsetvl_e32m8(x) + #define VLE(x,y) __riscv_vle32_v_f32m8(x, y) + #define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) + #define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) + #define VECTORTYPE vfloat32m8_t + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "mxm/template/GB_AxB_saxpy5_lv.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 572ff1e8e4..e25ca0b158 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -49,6 +49,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE double @@ -287,6 +288,34 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + + #if GB_COMPILER_SUPPORTS_RVV1 + + #include + #define VSETVL(x) __riscv_vsetvl_e64m8(x) + #define VLE(x,y) __riscv_vle64_v_f64m8(x, y) + #define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) + #define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) + #define VECTORTYPE vfloat64m8_t + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "mxm/template/GB_AxB_saxpy5_lv.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index c3ee54779b..c26b1053da 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -122,6 +122,28 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + #------------------------------------------------------------------------------- # determine build type #------------------------------------------------------------------------------- diff --git a/GraphBLAS/rename/GB_rename.h b/GraphBLAS/rename/GB_rename.h index cbfab16bf0..95e783b997 100644 --- a/GraphBLAS/rename/GB_rename.h +++ b/GraphBLAS/rename/GB_rename.h @@ -390,6 +390,7 @@ #define GB_Global_calloc_function_set GM_Global_calloc_function_set #define GB_Global_cpu_features_avx2 GM_Global_cpu_features_avx2 #define GB_Global_cpu_features_avx512f GM_Global_cpu_features_avx512f +#define GB_Global_cpu_features_rvv_1_0 GM_Global_cpu_features_rvv_1_0 #define GB_Global_cpu_features_query GM_Global_cpu_features_query #define GB_Global_flush_get GM_Global_flush_get #define GB_Global_flush_set GM_Global_flush_set diff --git a/Source/codegen/Generator/GB_AxB.c b/Source/codegen/Generator/GB_AxB.c index 3466347c57..0f52d6f6d7 100644 --- a/Source/codegen/Generator/GB_AxB.c +++ b/Source/codegen/Generator/GB_AxB.c @@ -7,6 +7,9 @@ //------------------------------------------------------------------------------ +#ifdef GBRISCV64 +#include +#endif #include "GB_control.h" GB_type_enabled #if GB_TYPE_ENABLED @@ -310,6 +313,28 @@ m4_divert(if_semiring_has_avx) } #endif +m4_divert(if_semiring_has_rvv) + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "mxm/template/GB_AxB_saxpy5_lv.c" + } + + #endif + m4_divert(if_saxpy5_enabled) //---------------------------------------------------------------------- diff --git a/Source/cpu/GB_cpu_features.h b/Source/cpu/GB_cpu_features.h index bb9775b4fc..5ec7658d39 100644 --- a/Source/cpu/GB_cpu_features.h +++ b/Source/cpu/GB_cpu_features.h @@ -38,9 +38,13 @@ #include "cpu_features_macros.h" #define STACK_LINE_READER_BUFFER_SIZE 1024 #if GBX86 - // Intel x86 (also AMD): other architectures are not exploited + // Intel x86 (also AMD) #include "cpuinfo_x86.h" #endif + #if GBRISCV64 + // RISC-V + #include "cpuinfo_riscv.h" + #endif #endif #endif diff --git a/Source/cpu/GB_cpu_features_impl.c b/Source/cpu/GB_cpu_features_impl.c index a51a3f69d8..c4eb9bce6b 100644 --- a/Source/cpu/GB_cpu_features_impl.c +++ b/Source/cpu/GB_cpu_features_impl.c @@ -44,6 +44,7 @@ #include "src/impl_x86_freebsd.c" #include "src/impl_x86_linux_or_android.c" #include "src/impl_x86_windows.c" + #include "src/impl_riscv_linux.c" #if GBX86 #if (defined(__apple__) || defined(__APPLE__) || defined(__MACH__)) // needed for src/impl_x86_macos.c: diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 27db605d04..477fa2696e 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -133,6 +133,7 @@ typedef struct bool cpu_features_avx2 ; // x86_64 with AVX2 bool cpu_features_avx512f ; // x86_64 with AVX512f + bool cpu_features_rvv_1_0 ; // RISC-V with RVV1.0 //-------------------------------------------------------------------------- // integer control @@ -226,6 +227,7 @@ static GB_Global_struct GB_Global = // CPU features .cpu_features_avx2 = false, // x86_64 with AVX2 .cpu_features_avx512f = false, // x86_64 with AVX512f + .cpu_features_rvv_1_0 = false, // RISC-V with RVV1.0 // integer control .p_control = (int8_t) 32, @@ -356,16 +358,48 @@ void GB_Global_cpu_features_query (void) } #endif + } + #elif GBRISCV64 + { + + //---------------------------------------------------------------------- + // RISC-V architecture: see if RVV1.0 is supported + //---------------------------------------------------------------------- + + #if !defined ( GBNCPUFEAT ) + { + // Google's cpu_features package is available: use run-time tests + RiscvFeatures features = GetRiscvInfo ().features ; + GB_Global.cpu_features_rvv_1_0 = (bool) (features.V) ; + + } + #else + { + #if defined ( GBRVV ) + { + // the build system asserts whether or not RVV1.0 is available + GB_Global.cpu_features_rvv_1_0 = (bool) (GBRVV) ; + } + #else + { + // RVV1.0 not available + GB_Global.cpu_features_rvv_1_0 = false ; + } + #endif + } + #endif + } #else { //---------------------------------------------------------------------- - // not on the x86_64 architecture, so no AVX2 or AVX512F acceleration + // not on the x86_64 or RISC-V architecture, so no AVX2, AVX512F or RVV1.0 acceleration //---------------------------------------------------------------------- GB_Global.cpu_features_avx2 = false ; GB_Global.cpu_features_avx512f = false ; + GB_Global.cpu_features_rvv_1_0 = false ; } #endif @@ -381,6 +415,11 @@ bool GB_Global_cpu_features_avx512f (void) return (GB_Global.cpu_features_avx512f) ; } +bool GB_Global_cpu_features_rvv_1_0 (void) +{ + return (GB_Global.cpu_features_rvv_1_0) ; +} + //------------------------------------------------------------------------------ // hyper_switch //------------------------------------------------------------------------------ diff --git a/Source/global/GB_Global.h b/Source/global/GB_Global.h index 47a77897e9..e54687ffa5 100644 --- a/Source/global/GB_Global.h +++ b/Source/global/GB_Global.h @@ -17,6 +17,7 @@ void GB_Global_cpu_features_query (void) ; bool GB_Global_cpu_features_avx2 (void) ; bool GB_Global_cpu_features_avx512f (void) ; +bool GB_Global_cpu_features_rvv_1_0 (void) ; void GB_Global_mode_set (int mode) ; int GB_Global_mode_get (void) ; diff --git a/Source/include/GB_compiler.h b/Source/include/GB_compiler.h index 85f3d1ff2f..6430ab9b44 100644 --- a/Source/include/GB_compiler.h +++ b/Source/include/GB_compiler.h @@ -245,6 +245,16 @@ #endif +#if !defined ( GBRISCV64 ) + + #if defined(__riscv) + #define GBRISCV64 1 + #else + #define GBRISCV64 0 + #endif + +#endif + //------------------------------------------------------------------------------ // AVX2 and AVX512F support for the x86_64 architecture //------------------------------------------------------------------------------ @@ -306,6 +316,31 @@ #define GB_TARGET_AVX2 #endif +//------------------------------------------------------------------------------ +// RVV1.0 support for the RISC-V architecture +//------------------------------------------------------------------------------ + +#if GBRISCV64 + #if GB_COMPILER_GCC + // TODO: add other compilers + #if __GNUC__ >= 13 + #define GB_COMPILER_SUPPORTS_RVV1 1 + #else + #define GB_COMPILER_SUPPORTS_RVV1 0 + #endif + #endif +#else + // non-RISC-V architecture + #define GB_COMPILER_SUPPORTS_RVV1 0 +#endif + +// prefix for function with target rvv1.0 +#if GB_COMPILER_SUPPORTS_RVV1 + #define GB_TARGET_RVV1 __attribute__ ((target ("arch=rv64gcv"))) +#else + #define GB_TARGET_RVV1 +#endif + //------------------------------------------------------------------------------ // disable Google's cpu_featgures on some compilers //------------------------------------------------------------------------------ diff --git a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 36838a8363..09cafa5589 100644 --- a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -8,6 +8,9 @@ //------------------------------------------------------------------------------ #include "include/GB_AxB_saxpy3_template.h" +#ifdef GBRISCV64 +#include +#endif GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; @@ -82,6 +85,27 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; } #endif + + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "template/GB_AxB_saxpy5_lv.c" + } + + #endif #endif @@ -172,6 +196,20 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) } #endif + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION + { + #if GB_COMPILER_SUPPORTS_RVV1 + if (cpu_has_avx2) + { + // RISC-V64 with RVV1.0 + GB_AxB_saxpy5_unrolled_rvv (C, A, B, ntasks, nthreads, + B_slice) ; + return (GrB_SUCCESS) ; + } + #endif + } + #endif + // any architecture and any semiring GB_AxB_saxpy5_unrolled_vanilla (C, A, B, ntasks, nthreads, B_slice) ; diff --git a/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/Source/mxm/factory/GB_AxB_saxpy5_meta.c index cd4851405f..4f26e0803b 100644 --- a/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -143,6 +143,17 @@ } #endif #endif + + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION + #if GB_COMPILER_SUPPORTS_RVV1 + if (GB_Global_cpu_features_rvv_1_0 ( )) + { + GB_AxB_saxpy5_unrolled_rvv (C, A, B, + ntasks, nthreads, B_slice) ; + return (GrB_SUCCESS) ; + } + #endif + #endif // any architecture and any built-in semiring GB_AxB_saxpy5_unrolled_vanilla (C, A, B, ntasks, nthreads, B_slice) ; diff --git a/Source/mxm/include/GB_mxm_shared_definitions.h b/Source/mxm/include/GB_mxm_shared_definitions.h index 8ed47b3f83..85e1861ae1 100644 --- a/Source/mxm/include/GB_mxm_shared_definitions.h +++ b/Source/mxm/include/GB_mxm_shared_definitions.h @@ -71,6 +71,12 @@ #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 0 #endif +//1 if the semiring has a RVV1.0 implementation +#ifndef GB_SEMIRING_HAS_RVV_IMPLEMENTATION +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 0 +#endif + + //------------------------------------------------------------------------------ // special multiply operators //------------------------------------------------------------------------------ diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c new file mode 100644 index 0000000000..97b53ace43 --- /dev/null +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -0,0 +1,86 @@ +//------------------------------------------------------------------------------ +// GB_AxB_saxpy5_lv.c: C+=A*B when C is full +//------------------------------------------------------------------------------ + +{ + + //-------------------------------------------------------------------------- + // get C, A, and B + //-------------------------------------------------------------------------- + + const int64_t m = C->vlen; // # of rows of C and A + GB_Bp_DECLARE (Bp, const) ; GB_Bp_PTR (Bp, B) ; + GB_Bh_DECLARE (Bh, const) ; GB_Bh_PTR (Bh, B) ; + GB_Bi_DECLARE (Bi, const) ; GB_Bi_PTR (Bi, B) ; + const bool B_iso = B->iso ; + const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; + const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; + // get the max number of elements that vector can store + size_t vl = VSETVL(m); + GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; + + //-------------------------------------------------------------------------- + // C += A*B where A is full (and not iso or pattern-only) + //-------------------------------------------------------------------------- + + #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) + for (int tid = 0; tid < ntasks; tid++) + { + // get the task descriptor + const int64_t jB_start = B_slice[tid]; + const int64_t jB_end = B_slice[tid + 1]; + // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) + for (int64_t jB = jB_start; jB < jB_end; jB++) + { + // get B(:,j) and C(:,j) + const int64_t j = GBh_B (Bh, jB) ; + GB_C_TYPE *restrict Cxj = Cx + (j * m) ; + const int64_t pB_start = GB_IGET (Bp, jB) ; + const int64_t pB_end = GB_IGET (Bp, jB+1) ; + + //------------------------------------------------------------------ + // C(:,j) += A*B(:,j), on sets of vl rows of C and A at a time + //------------------------------------------------------------------ + + for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) + { + // get C(i:i+vl,j) + VECTORTYPE vc = VLE(Cxj + i, vl); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + // bkj = B(k,j) + const int64_t k = GB_IGET (Bi, pB) ; + GB_DECLAREB (bkj) ; + GB_GETB (bkj, Bx, pB, B_iso) ; + // get A(i,k) + VECTORTYPE va = VLE(Ax + i + k * m, vl); + // C(i:i+15,j) += A(i:i+15,k)*B(k,j) + vc = VFMACC(vc, bkj, va, vl); + } + // save C(i:i+15,j) + VSE(Cxj + i, vc, vl); + } + + //------------------------------------------------------------------ + // lines 179-1036 from GB_AxB_saxpy5_unrolled.c + //------------------------------------------------------------------ + + int64_t remaining = m % vl; + if (remaining > 0) + { + int64_t i = m - remaining; + VECTORTYPE vc = VLE(Cxj + i, remaining); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = GB_IGET (Bi, pB) ; + GB_DECLAREB (bkj) ; + GB_GETB (bkj, Bx, pB, B_iso) ; + VECTORTYPE va = VLE(Ax + i + k * m, remaining); + vc = VFMACC(vc, bkj, va, remaining); + } + + VSE(Cxj + i, vc, remaining); + } + } + } +} diff --git a/cpu_features/include/internal/hwcaps.h b/cpu_features/include/internal/hwcaps.h index 59e16576b8..fc078d4acf 100644 --- a/cpu_features/include/internal/hwcaps.h +++ b/cpu_features/include/internal/hwcaps.h @@ -134,6 +134,18 @@ CPU_FEATURES_START_CPP_NAMESPACE #define ARM_HWCAP2_SHA2 (1UL << 3) #define ARM_HWCAP2_CRC32 (1UL << 4) +// https://elixir.bootlin.com/linux/latest/source/arch/riscv/include/uapi/asm/hwcap.h +#define RISCV_HWCAP_32 0x32 +#define RISCV_HWCAP_64 0x64 +#define RISCV_HWCAP_128 0x128 +#define RISCV_HWCAP_M (1UL << ('M' - 'A')) +#define RISCV_HWCAP_A (1UL << ('A' - 'A')) +#define RISCV_HWCAP_F (1UL << ('F' - 'A')) +#define RISCV_HWCAP_D (1UL << ('D' - 'A')) +#define RISCV_HWCAP_Q (1UL << ('Q' - 'A')) +#define RISCV_HWCAP_C (1UL << ('C' - 'A')) +#define RISCV_HWCAP_V (1UL << ('V' - 'A')) + // http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h #define MIPS_HWCAP_R6 (1UL << 0) #define MIPS_HWCAP_MSA (1UL << 1) diff --git a/cpu_features/src/utils/list_cpu_features.c b/cpu_features/src/utils/list_cpu_features.c index 83cd387f08..00267e0091 100644 --- a/cpu_features/src/utils/list_cpu_features.c +++ b/cpu_features/src/utils/list_cpu_features.c @@ -423,6 +423,7 @@ static Node* CreateTree(void) { AddMapEntry(root, "microarchitecture", CreateString(strings.type.base_platform)); AddFlags(root, &info.features); +<<<<<<< HEAD #elif defined(CPU_FEATURES_ARCH_S390X) const S390XInfo info = GetS390XInfo(); const S390XPlatformStrings strings = GetS390XPlatformStrings(); @@ -431,16 +432,22 @@ static Node* CreateTree(void) { AddMapEntry(root, "model", CreateString(strings.type.platform)); AddMapEntry(root, "# processors", CreateInt(strings.num_processors)); AddFlags(root, &info.features); +======= +>>>>>>> 2bc06c36d7 (fix: add missing riscv defines into cpu_features) #elif defined(CPU_FEATURES_ARCH_RISCV) const RiscvInfo info = GetRiscvInfo(); AddMapEntry(root, "arch", CreateString("risc-v")); AddMapEntry(root, "vendor", CreateString(info.vendor)); AddMapEntry(root, "microarchitecture", CreateString(info.uarch)); +<<<<<<< HEAD AddFlags(root, &info.features); #elif defined(CPU_FEATURES_ARCH_LOONGARCH) const LoongArchInfo info = GetLoongArchInfo(); AddMapEntry(root, "arch", CreateString("loongarch")); AddFlags(root, &info.features); +======= + AddFlags(root, &info.features); +>>>>>>> 2bc06c36d7 (fix: add missing riscv defines into cpu_features) #endif return root; }