Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/targets/carfield/config/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export PULP_SDK_HOME = $(PULPRT_HOME)
PULP_APPS = pulp_cluster_runtime
PULP_APP_SRCS = $(MATCH_COMMON_SRCS) $(MATCH_pulp_cluster_OFFLOAD_SRC) $(PULPRT_HOME)/lib/libc/minimal/io.c $(PULPRT_HOME)/lib/libc/minimal/prf.c

PULP_CFLAGS = -O3 $(MATCH_INCLUDES) -DCLUSTER_COMPILATION -DARCHI_CLUSTER_NB_PE=8 -I$(PULPRT_HOME)/lib/libc/minimal/include -D__pulp_cluster__
PULP_CFLAGS = -O3 $(MATCH_INCLUDES) -DCLUSTER_COMPILATION -DARCHI_CLUSTER_NB_PE=8 -I$(PULPRT_HOME)/lib/libc/minimal/include -D__pulp_cluster__ -Dhalf=float16 -D_Float16=float16
PULPD_ELF_REMOVE_SECTIONS := --remove-section .l1cluster_g --remove-section .bss_l1

-include $(PULP_SDK_HOME)/install/rules/pulp.mk
Expand All @@ -61,7 +61,7 @@ build-offload:
$(PULPD_RISCV)-objcopy $(PULPD_ELF_REMOVE_SECTIONS) $(BUILD_DIR)/build/pulp_cluster_runtime/pulp_cluster_runtime;

@echo "Generating objdump..."
$(PULPD_RISCV)-objdump -d -S $(BUILD_DIR)/build/pulp_cluster_runtime/pulp_cluster_runtime > $(BUILD_DIR)/build/pulp_cluster_runtime/pulp_cluster_runtime.dump;
$(PULPD_RISCV)-objdump -drwCS $(BUILD_DIR)/build/pulp_cluster_runtime/pulp_cluster_runtime > $(BUILD_DIR)/build/pulp_cluster_runtime/pulp_cluster_runtime.dump;

@echo "Runtime offload build done."

Expand All @@ -74,7 +74,7 @@ build-offload:
CAR_SW_DIR := $(CAR_ROOT)/sw
CHS_ROOT ?= $(shell $(BENDER) path cheshire)

CHS_SW_GCC_BINROOT ?= /usr/pack/riscv-1.0-kgf/riscv64-gcc-11.2.0/bin
CHS_SW_GCC_BINROOT ?= /usr/pack/riscv-1.0-kgf/riscv64-gcc-14.2.0/bin

-include $(CHS_ROOT)/cheshire.mk
CHS_BOOTMODE ?= 0 # default passive bootmode
Expand Down Expand Up @@ -105,7 +105,7 @@ $(HOST_LIB): $(HOST_LIB_SRCS_O)
$(CAR_SW_DIR)/%.car.o: $(CAR_SW_DIR)/%.c
$(CHS_SW_CC) $(CAR_SW_INCLUDES) $(CHS_SW_CCFLAGS) -c $< -o $@

HOST_FLAGS := -T$(HOST_LD_SCRIPT) -Wno-pointer-to-int-cast -DIntClustNumCores=8 -g
HOST_FLAGS := -T$(HOST_LD_SCRIPT) -Wno-pointer-to-int-cast -DIntClustNumCores=8 -Dhalf=_Float16 -g -march=rv64gc_zifencei



Expand All @@ -123,5 +123,5 @@ build-host: $(HOST_LIB) build-payload
@echo $(HOST_LIB_SRCS_O)
$(CHS_SW_CC) $(HOST_INCLUDES) $(MATCH_INCLUDES) $(CHS_SW_LDFLAGS) $(HOST_FLAGS) -o $(BUILD_DIR)/host.elf $(HOST_LIB) $(MATCH_COMMON_SRCS) $(MATCH_HOST_SRC) $(CHS_SW_LIBS)
@echo "Generating objdump"
@$(CHS_SW_OBJDUMP) -d -S $(BUILD_DIR)/host.elf > $(BUILD_DIR)/host.dump
@$(CHS_SW_OBJDUMP) -drwCS $(BUILD_DIR)/host.elf > $(BUILD_DIR)/host.dump
@echo "Host build done"
6 changes: 2 additions & 4 deletions examples/targets/carfield/config/link.ld
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,9 @@ SECTIONS {
*(.bulk.*)
} > l2

.l2_heap : ALIGN(32) {
.l2_heap (NOLOAD) : ALIGN(32) {
__l2_heap_start = .;
*(.l2_heap)
*(.l2_heap.*)
. = ALIGN(32);
. = ORIGIN(l2) + LENGTH(l2) - LENGTH(l2_common);
__l2_heap_end = .;
} > l2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void carfield_free_ram(void* ext, size_t size);
extern volatile uint32_t last_completed_node_id;
extern volatile uint32_t last_task_error_code;

#define GLOBAL_IRQ_ENABLE 0x00001808
#define GLOBAL_IRQ_ENABLE (1UL << 3)
#define EXTERNAL_IRQ_ENABLE 0x00000800
#define PLIC_BASE_ADDRESS 0x04000000

Expand Down
6 changes: 6 additions & 0 deletions examples/targets/carfield/libs/carfield_lib/include/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include "pulp.h"
#include "bench/bench.h"
#include "pulp_nn/pulp_nn_kernels.h"

typedef float16 fp16;
typedef fp16 v2f16 __attribute__((vector_size (4)));

#endif


Expand Down Expand Up @@ -76,6 +80,8 @@ void pulp_nn_hoparallel_conv2d_wrapper(MatchCtx* ctx);

void pulp_nn_add_wrapper(MatchCtx* ctx);

void pulp_nn_dense_fp16_wrapper(MatchCtx* ctx);

void pulp_nn_wrapper(MatchCtx* ctx);

#endif // CAR_LIB_CLUSTER_H
13 changes: 11 additions & 2 deletions examples/targets/carfield/libs/carfield_lib/src/carfield.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,19 @@ static dif_rv_plic_t plic0;
void carfield_init_plic() {
// Reset PLIC
dif_rv_plic_reset(&plic0);

// Set global interrupt enable in CVA6 csr
asm volatile("csrw mstatus, %0\n" : : "r"(GLOBAL_IRQ_ENABLE));
unsigned long mstatus;
asm volatile ("csrr %0, mstatus" : "=r"(mstatus));
mstatus |= GLOBAL_IRQ_ENABLE;
asm volatile ("csrw mstatus, %0" :: "r"(mstatus));

// Set external interrupt enable in CVA6 csr
asm volatile("csrw mie, %0\n" : : "r"(EXTERNAL_IRQ_ENABLE));
unsigned long mie;
asm volatile ("csrr %0, mie" : "=r"(mie));
mie |= EXTERNAL_IRQ_ENABLE;
asm volatile ("csrw mie, %0" :: "r"(mie));

// Setup PLIC
mmio_region_t plic_base_addr = mmio_region_from_addr(PLIC_BASE_ADDRESS);
dif_result_t t = dif_rv_plic_init(plic_base_addr, &plic0);
Expand Down
56 changes: 53 additions & 3 deletions examples/targets/carfield/libs/carfield_lib/src/cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "carfield_lib/mbox.h"
#include "carfield_lib/utils.h"

#include "pulp_nn/pulp_nn_kernels.h"
#include "pulp_nn_fp16/pulp_nn_kernels_fp16.h"

//#define CLUSTER_LIB_DEBUG
#define DEBUG_CALLOC_L1_SCRATCHPAD 0
#define DEBUG_BLOCKING_DMA 0
Expand Down Expand Up @@ -58,6 +61,11 @@ void cluster_sync_cores(MatchCtx* ctx)

void cluster_lib_init(MatchCtx* ctx)
{
#ifdef CLUSTER_LIB_DEBUG
for (int i = 0; i < 20000; i++) {
asm volatile("fence rw,rw":::"memory");
}
#endif
dma_transfer_ = dma_transfer_create();
#ifdef CLUSTER_LIB_DEBUG
mini_printf("[PULP] Yo! Cluster is alive! DMA counter is %d\r\n", dma_transfer_);
Expand Down Expand Up @@ -719,6 +727,36 @@ void pulp_nn_add_wrapper(MatchCtx* ctx){
);
}


void pulp_nn_dense_fp16_wrapper(MatchCtx* ctx) {
MatchTensor* tensors = ctx->tensors->tensors;
int num_ops = ctx->ops->num_ops;
int num_tensors = ctx->tensors->num_tensors;
int out_ch = tensors[num_tensors-1].tiles[L1_SCRATCHPAD*2+1].size;
int inp_ch = tensors[0].tiles[L1_SCRATCHPAD*2+1].size;
#ifdef CLUSTER_LIB_DEBUG
if(rt_core_id() == 0) {
mini_printf("[PULP][KER] pulp_nn_linear_fp16: ");
mini_printf("Out. tile (%d,) | ", out_ch);
mini_printf("Inp. tile (%d,)\r\n", inp_ch);
}
#endif
pulp_nn_linear_fp16(
// activations pt
(float16*)tensors[0].pt, // acts pt
// weights pt
(float16*)tensors[1].pt, // weights pt
// output pt
(float16*)tensors[num_tensors-1].pt, // output pt
// bias pt
num_tensors>4 ? (float16*)NULL : (float16*)tensors[2].pt, // bias pt
// dims
inp_ch,
out_ch
);
}


void pulp_nn_wrapper(MatchCtx* ctx){

switch(ctx->pattern_name){
Expand All @@ -728,9 +766,9 @@ void pulp_nn_wrapper(MatchCtx* ctx){
case conv2d:
pulp_nn_hoparallel_conv2d_wrapper(ctx);
break;
case dense_out:
pulp_nn_dense_out_int_wrapper(ctx);
break;
//case dense_out:
// pulp_nn_dense_out_int_wrapper(ctx);
// break;
// case pulp_nn_dw_conv2d_less_4_pattern:
// pi_team_offload_preset(pulp_nn_dw_conv2d_less_4_wrapper, ctx);
// break;
Expand All @@ -743,6 +781,8 @@ void pulp_nn_wrapper(MatchCtx* ctx){
case add_requant:
pulp_nn_add_wrapper(ctx);
break;
case dense_fp16:
pulp_nn_dense_fp16_wrapper(ctx);
default:
break;
}
Expand Down Expand Up @@ -807,4 +847,14 @@ uint32_t cluster_timer_stop() {
}



double __attribute__((weak)) __extendhfdf2(float16 val)
{
float res;
__asm__ __volatile__ ("fcvt.s.h %0, %1": "=f"(res): "f"(val) :);
return (double) res;
}



#endif
Loading