diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cde4e62a..f8496d93 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -62,6 +62,8 @@ jobs: platform: "arm64" - runner: "ubuntu-22.04-arm" platform: "aarch64" + - runner: "windows-11-arm" + platform: "ARM64" steps: - name: Checkout uses: actions/checkout@v4 @@ -141,7 +143,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - uses: uraimo/run-on-arch-action@v2 + - uses: uraimo/run-on-arch-action@v3 with: arch: ${{matrix.arch}} distro: alpine_latest @@ -162,7 +164,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - uses: uraimo/run-on-arch-action@v2 + - uses: uraimo/run-on-arch-action@v3 with: arch: ${{matrix.arch}} distro: alpine_latest diff --git a/cmake/test-arch.c b/cmake/test-arch.c index b438405e..a62c1443 100644 --- a/cmake/test-arch.c +++ b/cmake/test-arch.c @@ -12,7 +12,8 @@ //////////////////////////////////////////////////////////////////////////////// // ARM 64-Bit -#if defined(__aarch64__) +#if defined(__aarch64__) \ + || defined(_M_ARM64) #error ##arch=arm64## // ARM 32-Bit diff --git a/lib/arch/neon64/codec.c b/lib/arch/neon64/codec.c index 6b664b40..7ac1a8a7 100644 --- a/lib/arch/neon64/codec.c +++ b/lib/arch/neon64/codec.c @@ -8,13 +8,7 @@ #include "config.h" #include "../../env.h" -#ifdef __aarch64__ -# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64 -# define BASE64_USE_NEON64 -# endif -#endif - -#ifdef BASE64_USE_NEON64 +#if HAVE_NEON64 #include // Only enable inline assembly on supported compilers. @@ -66,7 +60,7 @@ load_64byte_table (const uint8_t *p) # include "enc_loop.c" #endif -#endif // BASE64_USE_NEON64 +#endif // HAVE_NEON64 // Stride size is so large on these NEON 64-bit functions // (48 bytes encode, 64 bytes decode) that we inline the @@ -75,7 +69,7 @@ load_64byte_table (const uint8_t *p) void base64_stream_encode_neon64 BASE64_ENC_PARAMS { -#ifdef BASE64_USE_NEON64 +#if HAVE_NEON64 #include "../generic/enc_head.c" enc_loop_neon64(&s, &slen, &o, &olen); enc_loop_generic_64(&s, &slen, &o, &olen); @@ -88,7 +82,7 @@ base64_stream_encode_neon64 BASE64_ENC_PARAMS int base64_stream_decode_neon64 BASE64_DEC_PARAMS { -#ifdef BASE64_USE_NEON64 +#if HAVE_NEON64 #include "../generic/dec_head.c" dec_loop_neon64(&s, &slen, &o, &olen); dec_loop_generic_32(&s, &slen, &o, &olen); diff --git a/lib/arch/neon64/dec_loop.c b/lib/arch/neon64/dec_loop.c index 48232f20..428e0651 100644 --- a/lib/arch/neon64/dec_loop.c +++ b/lib/arch/neon64/dec_loop.c @@ -100,10 +100,10 @@ dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen) // Check for invalid input, any value larger than 63: const uint8x16_t classified - = vcgtq_u8(str.val[0], vdupq_n_u8(63)) - | vcgtq_u8(str.val[1], vdupq_n_u8(63)) - | vcgtq_u8(str.val[2], vdupq_n_u8(63)) - | vcgtq_u8(str.val[3], vdupq_n_u8(63)); + = vorrq_u8( + vorrq_u8(vcgtq_u8(str.val[0], vdupq_n_u8(63)), vcgtq_u8(str.val[1], vdupq_n_u8(63))), + vorrq_u8(vcgtq_u8(str.val[2], vdupq_n_u8(63)), vcgtq_u8(str.val[3], vdupq_n_u8(63))) + ); // Check that all bits are zero: if (vmaxvq_u8(classified) != 0U) { @@ -111,9 +111,9 @@ dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen) } // Compress four bytes into three: - dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); - dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); - dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; + dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4)); + dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2)); + dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]); // Interleave and store decoded result: vst3q_u8((uint8_t *) *o, dec); diff --git a/lib/codec_choose.c b/lib/codec_choose.c index a98b9472..8b5938dd 100644 --- a/lib/codec_choose.c +++ b/lib/codec_choose.c @@ -163,13 +163,13 @@ codec_choose_forced (struct codec *codec, int flags) static bool codec_choose_arm (struct codec *codec) { -#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32) +#if HAVE_NEON64 || ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32) // Unfortunately there is no portable way to check for NEON // support at runtime from userland in the same way that x86 // has cpuid, so just stick to the compile-time configuration: - #if defined(__aarch64__) && HAVE_NEON64 + #if HAVE_NEON64 codec->enc = base64_stream_encode_neon64; codec->dec = base64_stream_decode_neon64; #else