Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ jobs:
platform: "arm64"
- runner: "ubuntu-22.04-arm"
platform: "aarch64"
- runner: "windows-11-arm"
platform: "ARM64"
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -141,7 +143,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: uraimo/run-on-arch-action@v2
- uses: uraimo/run-on-arch-action@v3
with:
arch: ${{matrix.arch}}
distro: alpine_latest
Expand All @@ -162,7 +164,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: uraimo/run-on-arch-action@v2
- uses: uraimo/run-on-arch-action@v3
with:
arch: ${{matrix.arch}}
distro: alpine_latest
Expand Down
3 changes: 2 additions & 1 deletion cmake/test-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
////////////////////////////////////////////////////////////////////////////////

// ARM 64-Bit
#if defined(__aarch64__)
#if defined(__aarch64__) \
|| defined(_M_ARM64)
#error ##arch=arm64##

// ARM 32-Bit
Expand Down
14 changes: 4 additions & 10 deletions lib/arch/neon64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,7 @@
#include "config.h"
#include "../../env.h"

#ifdef __aarch64__
# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
# define BASE64_USE_NEON64
# endif
#endif

#ifdef BASE64_USE_NEON64
#if HAVE_NEON64
#include <arm_neon.h>

// Only enable inline assembly on supported compilers.
Expand Down Expand Up @@ -66,7 +60,7 @@ load_64byte_table (const uint8_t *p)
# include "enc_loop.c"
#endif

#endif // BASE64_USE_NEON64
#endif // HAVE_NEON64

// Stride size is so large on these NEON 64-bit functions
// (48 bytes encode, 64 bytes decode) that we inline the
Expand All @@ -75,7 +69,7 @@ load_64byte_table (const uint8_t *p)
void
base64_stream_encode_neon64 BASE64_ENC_PARAMS
{
#ifdef BASE64_USE_NEON64
#if HAVE_NEON64
#include "../generic/enc_head.c"
enc_loop_neon64(&s, &slen, &o, &olen);
enc_loop_generic_64(&s, &slen, &o, &olen);
Expand All @@ -88,7 +82,7 @@ base64_stream_encode_neon64 BASE64_ENC_PARAMS
int
base64_stream_decode_neon64 BASE64_DEC_PARAMS
{
#ifdef BASE64_USE_NEON64
#if HAVE_NEON64
#include "../generic/dec_head.c"
dec_loop_neon64(&s, &slen, &o, &olen);
dec_loop_generic_32(&s, &slen, &o, &olen);
Expand Down
14 changes: 7 additions & 7 deletions lib/arch/neon64/dec_loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,20 @@ dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)

// Check for invalid input, any value larger than 63:
const uint8x16_t classified
= vcgtq_u8(str.val[0], vdupq_n_u8(63))
| vcgtq_u8(str.val[1], vdupq_n_u8(63))
| vcgtq_u8(str.val[2], vdupq_n_u8(63))
| vcgtq_u8(str.val[3], vdupq_n_u8(63));
= vorrq_u8(
vorrq_u8(vcgtq_u8(str.val[0], vdupq_n_u8(63)), vcgtq_u8(str.val[1], vdupq_n_u8(63))),
vorrq_u8(vcgtq_u8(str.val[2], vdupq_n_u8(63)), vcgtq_u8(str.val[3], vdupq_n_u8(63)))
);

// Check that all bits are zero:
if (vmaxvq_u8(classified) != 0U) {
break;
}

// Compress four bytes into three:
dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);

// Interleave and store decoded result:
vst3q_u8((uint8_t *) *o, dec);
Expand Down
4 changes: 2 additions & 2 deletions lib/codec_choose.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,13 @@ codec_choose_forced (struct codec *codec, int flags)
static bool
codec_choose_arm (struct codec *codec)
{
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
#if HAVE_NEON64 || ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32)

// Unfortunately there is no portable way to check for NEON
// support at runtime from userland in the same way that x86
// has cpuid, so just stick to the compile-time configuration:

#if defined(__aarch64__) && HAVE_NEON64
#if HAVE_NEON64
codec->enc = base64_stream_encode_neon64;
codec->dec = base64_stream_decode_neon64;
#else
Expand Down