Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions arch/arm64/lib/copy_from_user.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,25 +63,60 @@
USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!)
.endm

.macro ldpc1 reg1, reg2, ptr, val
user_ldp 9997f, \reg1, \reg2, \ptr, \val, #16
.endm

.macro stpc1 reg1, reg2, ptr, val
stp \reg1, \reg2, [\ptr], \val
.endm

end .req x5
srcin .req x15
dstin .req x6
req_reg_pcuabi srcin, c15, x15
SYM_FUNC_START(COPY_FUNC_NAME)
add end, x0, x2
mov dstin, x0
#ifdef CONFIG_CHERI_PURECAP_UABI
.arch morello+c64
bx #4
/*
* Having switched to C64, argumentless RET is equivalent to RET CLR.
* Because we have been called from A64, only LR is set. We therefore
* set CLR to a valid capability, derived from PCC (as if we had been
* called from C64). Conveniently this will also automatically switch
* us back to A64 when returning (as the LSB of LR should be unset).
*/
cvtp clr, lr
/*
* Accessing memory via X registers in C64 requires using
* alternate-base loads and stores; unfortunately most loads and stores
* used in copy_template.S do not have an alternate-base counterpart.
* The most straightforward solution is to access memory via C
* registers only. We therefore need to create a valid capability for
* the kernel buffer too, which is done by deriving it from DDC. Since
* X-based accesses are validated against DDC, this is functionally
* equivalent.
*/
cvtd c0, x0
mov srcin, c1
#else
mov srcin, x1
#endif
#include "copy_template.S"
mov x0, #0 // Nothing to copy
ret

// Exception fixups
9996: b.cs 9997f
// Registers are in Option A format
add dst, dst, count
9997: cmp dst, dstin
add dstx, dstx, count
9997: cmp dstx, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
USER(9998f, ldtrb tmp1w, [srcin])
strb tmp1w, [dst], #1
9998: sub x0, end, dst // bytes not copied
9998: sub x0, end, dstx // bytes not copied
ret
SYM_FUNC_END(COPY_FUNC_NAME)
EXPORT_SYMBOL(COPY_FUNC_NAME)
90 changes: 83 additions & 7 deletions arch/arm64/lib/copy_template.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,16 @@
* x0 - dest
* x1 - src
* x2 - n
* Returns:
* x0 - dest
*/
dstin .req x0
src .req x1
req_reg_pcuabi dst, c0, x0
dstx .req x0
req_reg_pcuabi src, c1, x1
srcx .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
dst .req x6

A_l .req x7
A_h .req x8
Expand All @@ -39,7 +38,15 @@ C_h .req x12
D_l .req x13
D_h .req x14

mov dst, dstin
#ifdef COPY_CAPTAGS
tmp1c .req c3
tmp2c .req c4

Ac_l .req c7
Ac_h .req c8
Bc_l .req c9
Bc_h .req c10
#endif

#ifdef CONFIG_AS_HAS_MOPS
alternative_if_not ARM64_HAS_MOPS
Expand All @@ -54,7 +61,7 @@ alternative_else_nop_endif
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15

neg tmp2, src
neg tmp2, srcx
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned
sub count, count, tmp2
Expand All @@ -81,6 +88,11 @@ alternative_else_nop_endif
str1 tmp1, dst, #8

.LSrcAligned:
#ifdef COPY_CAPTAGS
/* src now 16-byte aligned, copy capability tags if dst also aligned */
tst dstx, #15
b.eq .LSrcAligned_cpycaps
#endif
cmp count, #64
b.ge .Lcpy_over64
/*
Expand Down Expand Up @@ -188,4 +200,68 @@ alternative_else_nop_endif

tst count, #0x3f
b.ne .Ltail63
#ifdef COPY_CAPTAGS
b .Lexitfunc

/*
* The .L*_cpycaps instruction sequences below are copies of the
* sequences above.
* The only functional difference is that they use capability
* loads/stores, such that capability tags are copied from the source to
* the destination. For that reason, they require both src and dst to be
* 16-byte aligned.
* Because C registers are twice as bigs as X registers, we only need
* half the L/S instructions to transfer the same amount of data.
*/
.LSrcAligned_cpycaps:
cmp count, #64
b.ge .Lcpy_over64_cpycaps

.Ltail63_cpycaps:
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldr1 tmp2c, src, #16
str1 tmp2c, dst, #16
1:
ldr1 tmp2c, src, #16
str1 tmp2c, dst, #16
2:
ldr1 tmp2c, src, #16
str1 tmp2c, dst, #16
b .Ltiny15

.Lcpy_over64_cpycaps:
subs count, count, #128
b.ge .Lcpy_body_large_cpycaps

ldpc1 Ac_l, Ac_h, src, #32
stpc1 Ac_l, Ac_h, dst, #32
ldpc1 Bc_l, Bc_h, src, #32
stpc1 Bc_l, Bc_h, dst, #32

tst count, #0x3f
b.ne .Ltail63_cpycaps
b .Lexitfunc

.p2align L1_CACHE_SHIFT
.Lcpy_body_large_cpycaps:
ldpc1 Ac_l, Ac_h, src, #32
ldpc1 Bc_l, Bc_h, src, #32
1:
/* attempt to keep 64-byte blocks of loads and stores interlaced */
stpc1 Ac_l, Ac_h, dst, #32
ldpc1 Ac_l, Ac_h, src, #32
stpc1 Bc_l, Bc_h, dst, #32
ldpc1 Bc_l, Bc_h, src, #32
subs count, count, #64
b.ge 1b
stpc1 Ac_l, Ac_h, dst, #32
stpc1 Bc_l, Bc_h, dst, #32

tst count, #0x3f
b.ne .Ltail63_cpycaps
#endif /* COPY_CAPTAGS */
.Lexitfunc:
27 changes: 23 additions & 4 deletions arch/arm64/lib/copy_to_user.S
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,45 @@
USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!)
.endm

.macro ldpc1 reg1, reg2, ptr, val
ldp \reg1, \reg2, [\ptr], \val
.endm

.macro stpc1 reg1, reg2, ptr, val
user_stp 9997f, \reg1, \reg2, \ptr, \val, #16
.endm

end .req x5
srcin .req x15
dstin .req x6
req_reg_pcuabi srcin, c15, x15
SYM_FUNC_START(COPY_FUNC_NAME)
add end, x0, x2
mov dstin, x0
#ifdef CONFIG_CHERI_PURECAP_UABI
.arch morello+c64
bx #4
/* See comments in copy_from_user.S */
cvtp clr, lr
cvtd c1, x1
mov srcin, c1
#else
mov srcin, x1
#endif
#include "copy_template.S"
mov x0, #0
ret

// Exception fixups
9996: b.cs 9997f
// Registers are in Option A format
add dst, dst, count
9997: cmp dst, dstin
add dstx, dstx, count
9997: cmp dstx, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
ldrb tmp1w, [srcin]
USER(9998f, sttrb tmp1w, [dst])
add dst, dst, #1
9998: sub x0, end, dst // bytes not copied
9998: sub x0, end, dstx // bytes not copied
ret
SYM_FUNC_END(COPY_FUNC_NAME)
EXPORT_SYMBOL(COPY_FUNC_NAME)
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
#ifndef R_MORELLO_RELATIVE
#define R_MORELLO_RELATIVE 59395
#endif
#ifndef R_MORELLO_FUNC_RELATIVE
#define R_MORELLO_FUNC_RELATIVE 59400
#endif

struct cap_reloc {
size_t capability_location;
Expand Down Expand Up @@ -181,7 +184,8 @@ void __morello_process_dynamic_relocs(void *auxv)
for (reloc = rela_dyn_start; reloc < rela_dyn_end; ++reloc) {
uintptr_t *reloc_addr, value;

if (reloc->r_info != R_MORELLO_RELATIVE)
if (reloc->r_info != R_MORELLO_RELATIVE &&
reloc->r_info != R_MORELLO_FUNC_RELATIVE)
continue;
reloc_addr = (uintptr_t *)cheri_address_set(cap_rw, reloc->r_offset);
value = morello_relative(0, cap_rx, cap_rw, reloc, reloc_addr);
Expand Down
Loading