From 8036b2c943466960448865a34e099d3613d1f789 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Wed, 8 Sep 2021 11:45:14 +0100 Subject: [PATCH 1/4] arm64: morello: Implement raw_copy_{from,to}_user_with_captags() This patch implements the low-level tag-preserving uaccess routines. Most of the existing logic for __arch_copy_{from,to}_user() is reused to implement the new __arch_copy_{from,to}_user_with_captags(). copy_template.S is extended to copy capability tags for the *_with_captags() variants (controlled by the COPY_CAPTAGS macro). The approach is the same as a generic tag-preserving memcpy() implementation (i.e. copy tags whenever possible, which requires at least the source and destination addresses to be mutually aligned). The existing {ldr,str}1 assembler macros can be reused as-is to operate on capabilities, unfortunately this is not the case of {ldp,stp}1, because they need to know the register size (8 for X, 16 for C). user_{ldp,stp} are extended and a new {ldp,stp}c1 pair introduced accordingly. Note: the implementation of __arch_copy_{from,to}_user() remains unchanged (i.e. they never preserve tags). Signed-off-by: Kevin Brodsky --- arch/arm64/lib/copy_from_user.S | 8 ++++ arch/arm64/lib/copy_template.S | 79 +++++++++++++++++++++++++++++++++ arch/arm64/lib/copy_to_user.S | 8 ++++ 3 files changed, 95 insertions(+) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 204b2e72966790..42c95939afbded 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -63,6 +63,14 @@ USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!) .endm + .macro ldpc1 reg1, reg2, ptr, val + user_ldp 9997f, \reg1, \reg2, \ptr, \val, #16 + .endm + + .macro stpc1 reg1, reg2, ptr, val + stp \reg1, \reg2, [\ptr], \val + .endm + end .req x5 srcin .req x15 SYM_FUNC_START(COPY_FUNC_NAME) diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S index 7f2f5a0e2fb9f0..2002f069c0f5de 100644 --- a/arch/arm64/lib/copy_template.S +++ b/arch/arm64/lib/copy_template.S @@ -39,6 +39,16 @@ C_h .req x12 D_l .req x13 D_h .req x14 +#ifdef COPY_CAPTAGS +tmp1c .req c3 +tmp2c .req c4 + +Ac_l .req c7 +Ac_h .req c8 +Bc_l .req c9 +Bc_h .req c10 +#endif + mov dst, dstin #ifdef CONFIG_AS_HAS_MOPS @@ -81,6 +91,11 @@ alternative_else_nop_endif str1 tmp1, dst, #8 .LSrcAligned: +#ifdef COPY_CAPTAGS + /* src now 16-byte aligned, copy capability tags if dst also aligned */ + tst dst, #15 + b.eq .LSrcAligned_cpycaps +#endif cmp count, #64 b.ge .Lcpy_over64 /* @@ -188,4 +203,68 @@ alternative_else_nop_endif tst count, #0x3f b.ne .Ltail63 +#ifdef COPY_CAPTAGS + b .Lexitfunc + + /* + * The .L*_cpycaps instruction sequences below are copies of the + * sequences above. + * The only functional difference is that they use capability + * loads/stores, such that capability tags are copied from the source to + * the destination. For that reason, they require both src and dst to be + * 16-byte aligned. + * Because C registers are twice as bigs as X registers, we only need + * half the L/S instructions to transfer the same amount of data. + */ +.LSrcAligned_cpycaps: + cmp count, #64 + b.ge .Lcpy_over64_cpycaps + +.Ltail63_cpycaps: + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldr1 tmp2c, src, #16 + str1 tmp2c, dst, #16 +1: + ldr1 tmp2c, src, #16 + str1 tmp2c, dst, #16 +2: + ldr1 tmp2c, src, #16 + str1 tmp2c, dst, #16 + b .Ltiny15 + +.Lcpy_over64_cpycaps: + subs count, count, #128 + b.ge .Lcpy_body_large_cpycaps + + ldpc1 Ac_l, Ac_h, src, #32 + stpc1 Ac_l, Ac_h, dst, #32 + ldpc1 Bc_l, Bc_h, src, #32 + stpc1 Bc_l, Bc_h, dst, #32 + + tst count, #0x3f + b.ne .Ltail63_cpycaps + b .Lexitfunc + + .p2align L1_CACHE_SHIFT +.Lcpy_body_large_cpycaps: + ldpc1 Ac_l, Ac_h, src, #32 + ldpc1 Bc_l, Bc_h, src, #32 +1: + /* attempt to keep 64-byte blocks of loads and stores interlaced */ + stpc1 Ac_l, Ac_h, dst, #32 + ldpc1 Ac_l, Ac_h, src, #32 + stpc1 Bc_l, Bc_h, dst, #32 + ldpc1 Bc_l, Bc_h, src, #32 + subs count, count, #64 + b.ge 1b + stpc1 Ac_l, Ac_h, dst, #32 + stpc1 Bc_l, Bc_h, dst, #32 + + tst count, #0x3f + b.ne .Ltail63_cpycaps +#endif /* COPY_CAPTAGS */ .Lexitfunc: diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index f4ac5fec80fe49..746758270c6bfc 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -62,6 +62,14 @@ USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!) .endm + .macro ldpc1 reg1, reg2, ptr, val + ldp \reg1, \reg2, [\ptr], \val + .endm + + .macro stpc1 reg1, reg2, ptr, val + user_stp 9997f, \reg1, \reg2, \ptr, \val, #16 + .endm + end .req x5 srcin .req x15 SYM_FUNC_START(COPY_FUNC_NAME) From 0543360d71d89689a28f677d5bb5b90b04ef89ba Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Thu, 19 Oct 2023 09:32:19 +0100 Subject: [PATCH 2/4] arm64: lib: Simplify copy_*_user register allocation copy_template currently copies the destination pointer to x6 then operates on that copy, while it directly operates on the source pointer (x1). In both cases a copy of the original pointer is required for the final fixup in copy_*_user, but not in copy_template itself. Make things a little easier to follow by saving both pointers in copy_*_user, letting copy_template operate on the original pointers (x0 and x1) directly. While at it, remove the "Returns" comment in copy_template, which is irrelevant. Signed-off-by: Kevin Brodsky --- arch/arm64/lib/copy_from_user.S | 2 ++ arch/arm64/lib/copy_template.S | 7 +------ arch/arm64/lib/copy_to_user.S | 2 ++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 42c95939afbded..50e2d50097f91e 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -72,9 +72,11 @@ .endm end .req x5 +dstin .req x6 srcin .req x15 SYM_FUNC_START(COPY_FUNC_NAME) add end, x0, x2 + mov dstin, x0 mov srcin, x1 #include "copy_template.S" mov x0, #0 // Nothing to copy diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S index 2002f069c0f5de..d0fe03a365038c 100644 --- a/arch/arm64/lib/copy_template.S +++ b/arch/arm64/lib/copy_template.S @@ -18,17 +18,14 @@ * x0 - dest * x1 - src * x2 - n - * Returns: - * x0 - dest */ -dstin .req x0 +dst .req x0 src .req x1 count .req x2 tmp1 .req x3 tmp1w .req w3 tmp2 .req x4 tmp2w .req w4 -dst .req x6 A_l .req x7 A_h .req x8 @@ -49,8 +46,6 @@ Bc_l .req c9 Bc_h .req c10 #endif - mov dst, dstin - #ifdef CONFIG_AS_HAS_MOPS alternative_if_not ARM64_HAS_MOPS b .Lno_mops diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 746758270c6bfc..b62dc8ee6af4a2 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -71,9 +71,11 @@ .endm end .req x5 +dstin .req x6 srcin .req x15 SYM_FUNC_START(COPY_FUNC_NAME) add end, x0, x2 + mov dstin, x0 mov srcin, x1 #include "copy_template.S" mov x0, #0 From e0903e8b17f236e5d29d2c4413ce352fae0cfc0d Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Tue, 24 Oct 2023 12:41:55 +0100 Subject: [PATCH 3/4] arm64: lib: Switch to capability-based copy_*_user in PCuABI We have already amended {get,put}_user so that they access user memory directly via the user capability in PCuABI; now is time to convert copy_*_user too. As a result, the copy will be aborted if the user capability is unsuitable to perform the access (potentially partway, in case the tail of the targeted region is out of bounds). __arch_copy_{from,to}_user present an additional challenge in that they are implemented fully in assembly. Fortunately, the registers holding the source and destination pointers are mostly used as base registers for load/store instructions. After switching to C64, such instructions operate on C registers instead of X, so it becomes simply a matter of modifying the register aliases in PCuABI; the req_reg_pcuabi macro is introduced for that purpose. Explicit ADD instructions are also used in the user_{ldst,ldp,stp} helpers; those are unproblematic as they can operate on both X and C registers. In the few situations where the pointers are being somehow inspected, we keep operating on their address only, by using the corresponding X register; srcx and dstx are introduced for that purpose. This is necessary in some cases due to the instruction simply not accepting C registers (e.g. TST), while in others it is rather a matter of convenience, as it means we don't need to convert additional register aliases to capabilities (CMP, SUB). Signed-off-by: Kevin Brodsky --- arch/arm64/lib/copy_from_user.S | 33 +++++++++++++++++++++++++++++---- arch/arm64/lib/copy_template.S | 10 ++++++---- arch/arm64/lib/copy_to_user.S | 17 +++++++++++++---- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 50e2d50097f91e..351e853fd28dde 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -73,11 +73,36 @@ end .req x5 dstin .req x6 -srcin .req x15 +req_reg_pcuabi srcin, c15, x15 SYM_FUNC_START(COPY_FUNC_NAME) add end, x0, x2 mov dstin, x0 +#ifdef CONFIG_CHERI_PURECAP_UABI +.arch morello+c64 + bx #4 + /* + * Having switched to C64, argumentless RET is equivalent to RET CLR. + * Because we have been called from A64, only LR is set. We therefore + * set CLR to a valid capability, derived from PCC (as if we had been + * called from C64). Conveniently this will also automatically switch + * us back to A64 when returning (as the LSB of LR should be unset). + */ + cvtp clr, lr + /* + * Accessing memory via X registers in C64 requires using + * alternate-base loads and stores; unfortunately most loads and stores + * used in copy_template.S do not have an alternate-base counterpart. + * The most straightforward solution is to access memory via C + * registers only. We therefore need to create a valid capability for + * the kernel buffer too, which is done by deriving it from DDC. Since + * X-based accesses are validated against DDC, this is functionally + * equivalent. + */ + cvtd c0, x0 + mov srcin, c1 +#else mov srcin, x1 +#endif #include "copy_template.S" mov x0, #0 // Nothing to copy ret @@ -85,13 +110,13 @@ SYM_FUNC_START(COPY_FUNC_NAME) // Exception fixups 9996: b.cs 9997f // Registers are in Option A format - add dst, dst, count -9997: cmp dst, dstin + add dstx, dstx, count +9997: cmp dstx, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder USER(9998f, ldtrb tmp1w, [srcin]) strb tmp1w, [dst], #1 -9998: sub x0, end, dst // bytes not copied +9998: sub x0, end, dstx // bytes not copied ret SYM_FUNC_END(COPY_FUNC_NAME) EXPORT_SYMBOL(COPY_FUNC_NAME) diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S index d0fe03a365038c..459983994cfb66 100644 --- a/arch/arm64/lib/copy_template.S +++ b/arch/arm64/lib/copy_template.S @@ -19,8 +19,10 @@ * x1 - src * x2 - n */ -dst .req x0 -src .req x1 +req_reg_pcuabi dst, c0, x0 +dstx .req x0 +req_reg_pcuabi src, c1, x1 +srcx .req x1 count .req x2 tmp1 .req x3 tmp1w .req w3 @@ -59,7 +61,7 @@ alternative_else_nop_endif /*When memory length is less than 16, the accessed are not aligned.*/ b.lo .Ltiny15 - neg tmp2, src + neg tmp2, srcx ands tmp2, tmp2, #15/* Bytes to reach alignment. */ b.eq .LSrcAligned sub count, count, tmp2 @@ -88,7 +90,7 @@ alternative_else_nop_endif .LSrcAligned: #ifdef COPY_CAPTAGS /* src now 16-byte aligned, copy capability tags if dst also aligned */ - tst dst, #15 + tst dstx, #15 b.eq .LSrcAligned_cpycaps #endif cmp count, #64 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index b62dc8ee6af4a2..32e3a6b8fe94db 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -72,11 +72,20 @@ end .req x5 dstin .req x6 -srcin .req x15 +req_reg_pcuabi srcin, c15, x15 SYM_FUNC_START(COPY_FUNC_NAME) add end, x0, x2 mov dstin, x0 +#ifdef CONFIG_CHERI_PURECAP_UABI +.arch morello+c64 + bx #4 + /* See comments in copy_from_user.S */ + cvtp clr, lr + cvtd c1, x1 + mov srcin, c1 +#else mov srcin, x1 +#endif #include "copy_template.S" mov x0, #0 ret @@ -84,14 +93,14 @@ SYM_FUNC_START(COPY_FUNC_NAME) // Exception fixups 9996: b.cs 9997f // Registers are in Option A format - add dst, dst, count -9997: cmp dst, dstin + add dstx, dstx, count +9997: cmp dstx, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder ldrb tmp1w, [srcin] USER(9998f, sttrb tmp1w, [dst]) add dst, dst, #1 -9998: sub x0, end, dst // bytes not copied +9998: sub x0, end, dstx // bytes not copied ret SYM_FUNC_END(COPY_FUNC_NAME) EXPORT_SYMBOL(COPY_FUNC_NAME) From 5f12092b366df8d68f87e201ebbf65de13402a3f Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Fri, 6 Mar 2026 17:48:50 +0100 Subject: [PATCH 4/4] kselftests/arm64: morello: Support R_MORELLO_FUNC_RELATIVE Morello LLVM allows opting into distinguishing between function pointers (which may be interposed on by a compartmentalisation runtime) and code pointers (which always directly point to specific instructions, for cases like C++ landing pads and GNU C indirect goto). Supporting both relocations (which are equivalent for kselftest's non-compartmentalised purposes) will allow for Morello LLVM to make this opt-in behaviour always-on, reducing the number of ABI combinations. Signed-off-by: Jessica Clarke --- .../selftests/arm64/morello/freestanding_init_globals.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/morello/freestanding_init_globals.c b/tools/testing/selftests/arm64/morello/freestanding_init_globals.c index 12eb0af97887b6..2fa2aa643fd60d 100644 --- a/tools/testing/selftests/arm64/morello/freestanding_init_globals.c +++ b/tools/testing/selftests/arm64/morello/freestanding_init_globals.c @@ -38,6 +38,9 @@ #ifndef R_MORELLO_RELATIVE #define R_MORELLO_RELATIVE 59395 #endif +#ifndef R_MORELLO_FUNC_RELATIVE +#define R_MORELLO_FUNC_RELATIVE 59400 +#endif struct cap_reloc { size_t capability_location; @@ -181,7 +184,8 @@ void __morello_process_dynamic_relocs(void *auxv) for (reloc = rela_dyn_start; reloc < rela_dyn_end; ++reloc) { uintptr_t *reloc_addr, value; - if (reloc->r_info != R_MORELLO_RELATIVE) + if (reloc->r_info != R_MORELLO_RELATIVE && + reloc->r_info != R_MORELLO_FUNC_RELATIVE) continue; reloc_addr = (uintptr_t *)cheri_address_set(cap_rw, reloc->r_offset); value = morello_relative(0, cap_rx, cap_rw, reloc, reloc_addr);