From 92d47041cceabc2e96911fe3a1be52964e2f7926 Mon Sep 17 00:00:00 2001 From: jake massimo Date: Thu, 21 May 2026 22:02:28 +0000 Subject: [PATCH] x86: Add HOL Light proof and CBMC contracts for poly_caddq Adds formal verification for the x86_64 AVX2 poly_caddq implementation: - HOL Light proof of functional correctness (modular reduction) - Per-coefficient bounds folded directly into the core spec, matching the CBMC contract `ensures(array_bound(r, 0, MLDSA_N, 0, MLDSA_Q))` - Constant-time and memory safety proofs - CBMC contracts for the native x86_64 implementation Signed-off-by: Jake Massimo --- .github/workflows/hol_light.yml | 2 + BIBLIOGRAPHY.md | 1 + dev/x86_64/src/arith_native_x86_64.h | 10 +- dev/x86_64/src/poly_caddq_avx2_asm.S | 39 +- .../native/x86_64/src/arith_native_x86_64.h | 10 +- .../native/x86_64/src/poly_caddq_avx2_asm.S | 122 ++++- nix/s2n_bignum/default.nix | 4 +- proofs/cbmc/poly_caddq_native_x86_64/Makefile | 58 +++ .../poly_caddq_native_x86_64_harness.c | 15 + proofs/hol_light/README.md | 1 + proofs/hol_light/common/mldsa_specs.ml | 38 ++ proofs/hol_light/x86_64/Makefile | 1 + .../x86_64/mldsa/poly_caddq_avx2_asm.S | 187 +++++++ .../hol_light/x86_64/proofs/dump_bytecode.ml | 4 + proofs/hol_light/x86_64/proofs/mldsa_utils.ml | 2 +- .../x86_64/proofs/poly_caddq_avx2_asm.ml | 472 ++++++++++++++++++ .../x86_64/proofs/subroutine_signatures.ml | 15 + scripts/autogen | 6 + 18 files changed, 965 insertions(+), 22 deletions(-) create mode 100644 proofs/cbmc/poly_caddq_native_x86_64/Makefile create mode 100644 proofs/cbmc/poly_caddq_native_x86_64/poly_caddq_native_x86_64_harness.c create mode 100644 proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S create mode 100644 proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml index 50f9310bb..6c0ed93c1 100644 --- a/.github/workflows/hol_light.yml +++ b/.github/workflows/hol_light.yml @@ -203,6 +203,8 @@ jobs: matrix: proof: # Dependencies on {name}.{S,ml} are implicit + - name: poly_caddq_avx2_asm + needs: ["mldsa_specs.ml", "mldsa_utils.ml", "subroutine_signatures.ml"] - name: ntt_avx2_asm needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml", "subroutine_signatures.ml"] - name: intt_avx2_asm diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 3ff8243d5..eb3a35808 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -263,6 +263,7 @@ source code and documentation. - [proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S](proofs/hol_light/x86_64/mldsa/pointwise_acc_l5_avx2_asm.S) - [proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S](proofs/hol_light/x86_64/mldsa/pointwise_acc_l7_avx2_asm.S) - [proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S](proofs/hol_light/x86_64/mldsa/pointwise_avx2_asm.S) + - [proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S](proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S) ### `Round3_Spec` diff --git a/dev/x86_64/src/arith_native_x86_64.h b/dev/x86_64/src/arith_native_x86_64.h index 7c2975e5e..3173d0bf4 100644 --- a/dev/x86_64/src/arith_native_x86_64.h +++ b/dev/x86_64/src/arith_native_x86_64.h @@ -99,7 +99,15 @@ void mld_poly_decompose_88_avx2(int32_t *a1, int32_t *a0); #endif /* !MLD_CONFIG_NO_SIGN_API */ #define mld_poly_caddq_avx2_asm MLD_NAMESPACE(poly_caddq_avx2_asm) -void mld_poly_caddq_avx2_asm(int32_t *r); +void mld_poly_caddq_avx2_asm(int32_t *r) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(array_abs_bound(r, 0, MLDSA_N, MLDSA_Q)) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, 0, MLDSA_Q)) +); #if !defined(MLD_CONFIG_NO_VERIFY_API) #define mld_poly_use_hint_32_avx2 MLD_NAMESPACE(mld_poly_use_hint_32_avx2) diff --git a/dev/x86_64/src/poly_caddq_avx2_asm.S b/dev/x86_64/src/poly_caddq_avx2_asm.S index 338259909..cc61d792c 100644 --- a/dev/x86_64/src/poly_caddq_avx2_asm.S +++ b/dev/x86_64/src/poly_caddq_avx2_asm.S @@ -46,22 +46,43 @@ vmovdqa \reg, \offset(%rdi) .balign 16 MLD_ASM_FN_SYMBOL(poly_caddq_avx2_asm) -mov $8380417, %edx -leaq 1024(%rdi), %rax vpxor %xmm2, %xmm2, %xmm2 -vmovd %edx, %xmm1 +mov $8380417, %eax +vmovd %eax, %xmm1 vpbroadcastd %xmm1, %ymm1 -poly_caddq_avx2_loop: - caddq_vector 0, %ymm0 caddq_vector 32, %ymm3 caddq_vector 64, %ymm4 caddq_vector 96, %ymm5 - -addq $128, %rdi /* advance by 128 bytes (4 vectors) */ -cmpq %rdi, %rax -jne poly_caddq_avx2_loop /* 8 iterations (32/4 = 8) */ +caddq_vector 128, %ymm0 +caddq_vector 160, %ymm3 +caddq_vector 192, %ymm4 +caddq_vector 224, %ymm5 +caddq_vector 256, %ymm0 +caddq_vector 288, %ymm3 +caddq_vector 320, %ymm4 +caddq_vector 352, %ymm5 +caddq_vector 384, %ymm0 +caddq_vector 416, %ymm3 +caddq_vector 448, %ymm4 +caddq_vector 480, %ymm5 +caddq_vector 512, %ymm0 +caddq_vector 544, %ymm3 +caddq_vector 576, %ymm4 +caddq_vector 608, %ymm5 +caddq_vector 640, %ymm0 +caddq_vector 672, %ymm3 +caddq_vector 704, %ymm4 +caddq_vector 736, %ymm5 +caddq_vector 768, %ymm0 +caddq_vector 800, %ymm3 +caddq_vector 832, %ymm4 +caddq_vector 864, %ymm5 +caddq_vector 896, %ymm0 +caddq_vector 928, %ymm3 +caddq_vector 960, %ymm4 +caddq_vector 992, %ymm5 ret /* simpasm: footer-start */ diff --git a/mldsa/src/native/x86_64/src/arith_native_x86_64.h b/mldsa/src/native/x86_64/src/arith_native_x86_64.h index 7c2975e5e..3173d0bf4 100644 --- a/mldsa/src/native/x86_64/src/arith_native_x86_64.h +++ b/mldsa/src/native/x86_64/src/arith_native_x86_64.h @@ -99,7 +99,15 @@ void mld_poly_decompose_88_avx2(int32_t *a1, int32_t *a0); #endif /* !MLD_CONFIG_NO_SIGN_API */ #define mld_poly_caddq_avx2_asm MLD_NAMESPACE(poly_caddq_avx2_asm) -void mld_poly_caddq_avx2_asm(int32_t *r); +void mld_poly_caddq_avx2_asm(int32_t *r) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(array_abs_bound(r, 0, MLDSA_N, MLDSA_Q)) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, 0, MLDSA_Q)) +); #if !defined(MLD_CONFIG_NO_VERIFY_API) #define mld_poly_use_hint_32_avx2 MLD_NAMESPACE(mld_poly_use_hint_32_avx2) diff --git a/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S b/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S index cd4815fe3..3b9c8d953 100644 --- a/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +++ b/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S @@ -44,13 +44,10 @@ MLD_ASM_FN_SYMBOL(poly_caddq_avx2_asm) .cfi_startproc - movl $0x7fe001, %edx # imm = 0x7FE001 - leaq 0x400(%rdi), %rax vpxor %xmm2, %xmm2, %xmm2 - vmovd %edx, %xmm1 + movl $0x7fe001, %eax # imm = 0x7FE001 + vmovd %eax, %xmm1 vpbroadcastd %xmm1, %ymm1 - -Lpoly_caddq_avx2_loop: vpcmpgtd (%rdi), %ymm2, %ymm0 vpand %ymm1, %ymm0, %ymm0 vpaddd (%rdi), %ymm0, %ymm0 @@ -67,9 +64,118 @@ Lpoly_caddq_avx2_loop: vpand %ymm1, %ymm5, %ymm5 vpaddd 0x60(%rdi), %ymm5, %ymm5 vmovdqa %ymm5, 0x60(%rdi) - addq $0x80, %rdi - cmpq %rdi, %rax - jne Lpoly_caddq_avx2_loop + vpcmpgtd 0x80(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x80(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x80(%rdi) + vpcmpgtd 0xa0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0xa0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0xa0(%rdi) + vpcmpgtd 0xc0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0xc0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0xc0(%rdi) + vpcmpgtd 0xe0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0xe0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0xe0(%rdi) + vpcmpgtd 0x100(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x100(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x100(%rdi) + vpcmpgtd 0x120(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x120(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x120(%rdi) + vpcmpgtd 0x140(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x140(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x140(%rdi) + vpcmpgtd 0x160(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x160(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x160(%rdi) + vpcmpgtd 0x180(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x180(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x180(%rdi) + vpcmpgtd 0x1a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x1a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x1a0(%rdi) + vpcmpgtd 0x1c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x1c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x1c0(%rdi) + vpcmpgtd 0x1e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x1e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x1e0(%rdi) + vpcmpgtd 0x200(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x200(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x200(%rdi) + vpcmpgtd 0x220(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x220(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x220(%rdi) + vpcmpgtd 0x240(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x240(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x240(%rdi) + vpcmpgtd 0x260(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x260(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x260(%rdi) + vpcmpgtd 0x280(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x280(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x280(%rdi) + vpcmpgtd 0x2a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x2a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x2a0(%rdi) + vpcmpgtd 0x2c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x2c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x2c0(%rdi) + vpcmpgtd 0x2e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x2e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x2e0(%rdi) + vpcmpgtd 0x300(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x300(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x300(%rdi) + vpcmpgtd 0x320(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x320(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x320(%rdi) + vpcmpgtd 0x340(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x340(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x340(%rdi) + vpcmpgtd 0x360(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x360(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x360(%rdi) + vpcmpgtd 0x380(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x380(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x380(%rdi) + vpcmpgtd 0x3a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x3a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x3a0(%rdi) + vpcmpgtd 0x3c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x3c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x3c0(%rdi) + vpcmpgtd 0x3e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x3e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x3e0(%rdi) retq .cfi_endproc diff --git a/nix/s2n_bignum/default.nix b/nix/s2n_bignum/default.nix index 454479b65..b52229818 100644 --- a/nix/s2n_bignum/default.nix +++ b/nix/s2n_bignum/default.nix @@ -4,12 +4,12 @@ { stdenv, fetchFromGitHub, writeText, ... }: stdenv.mkDerivation rec { pname = "s2n_bignum"; - version = "f3c5acff6948d559194245237f6aaa7ebf7fcae8"; + version = "ccef24569ed5c41f4e7fcb19965bf48eff3fdaa0"; src = fetchFromGitHub { owner = "awslabs"; repo = "s2n-bignum"; rev = "${version}"; - hash = "sha256-kfc8X2e+voefttshSUdifDc3Qn+dx0Gq5ENNLhWIdw0="; + hash = "sha256-1KHAmHtBKMO+8Ea+1TTF6adKW3XKRmfcJC1vNZ/guRA="; }; setupHook = writeText "setup-hook.sh" '' export S2N_BIGNUM_DIR="$1" diff --git a/proofs/cbmc/poly_caddq_native_x86_64/Makefile b/proofs/cbmc/poly_caddq_native_x86_64/Makefile new file mode 100644 index 000000000..983d73374 --- /dev/null +++ b/proofs/cbmc/poly_caddq_native_x86_64/Makefile @@ -0,0 +1,58 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = poly_caddq_native_x86_64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = poly_caddq_native_x86_64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/x86_64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly.c + +CHECK_FUNCTION_CONTRACTS=mld_poly_caddq_native +USE_FUNCTION_CONTRACTS = mld_poly_caddq_avx2_asm +USE_FUNCTION_CONTRACTS += mld_sys_check_capability +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = poly_caddq_native_x86_64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mldsa/poly.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/proofs/cbmc/poly_caddq_native_x86_64/poly_caddq_native_x86_64_harness.c b/proofs/cbmc/poly_caddq_native_x86_64/poly_caddq_native_x86_64_harness.c new file mode 100644 index 000000000..b2ef4d855 --- /dev/null +++ b/proofs/cbmc/poly_caddq_native_x86_64/poly_caddq_native_x86_64_harness.c @@ -0,0 +1,15 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + +int mld_poly_caddq_native(int32_t a[MLDSA_N]); + +void harness(void) +{ + int32_t *a; + int t; + t = mld_poly_caddq_native(a); +} diff --git a/proofs/hol_light/README.md b/proofs/hol_light/README.md index afa2dd5ba..85eae6626 100644 --- a/proofs/hol_light/README.md +++ b/proofs/hol_light/README.md @@ -139,6 +139,7 @@ echo '1+1;;' | nc -w 5 127.0.0.1 2012 - ML-DSA Arithmetic: * x86_64 forward NTT: [ntt_avx2_asm.S](x86_64/mldsa/ntt_avx2_asm.S) * x86_64 inverse NTT: [intt_avx2_asm.S](x86_64/mldsa/intt_avx2_asm.S) + * x86_64 poly_caddq: [poly_caddq_avx2_asm.S](x86_64/mldsa/poly_caddq_avx2_asm.S) * x86_64 NTT unpack: [nttunpack_avx2_asm.S](x86_64/mldsa/nttunpack_avx2_asm.S) * x86_64 pointwise multiplication: [pointwise_avx2_asm.S](x86_64/mldsa/pointwise_avx2_asm.S) * x86_64 pointwise multiplication-accumulation (l=4): [pointwise_acc_l4_avx2_asm.S](x86_64/mldsa/pointwise_acc_l4_avx2_asm.S) diff --git a/proofs/hol_light/common/mldsa_specs.ml b/proofs/hol_light/common/mldsa_specs.ml index c6f627f4b..ef88cef26 100644 --- a/proofs/hol_light/common/mldsa_specs.ml +++ b/proofs/hol_light/common/mldsa_specs.ml @@ -1732,6 +1732,44 @@ let MLDSA_USE_HINT_88_UNFOLD = prove( SPEC_TAC(`mldsa_decompose_88 r`, `p:num#int`) THEN REWRITE_TAC[FORALL_PAIR_THM] THEN CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[]);; + +(* ========================================================================= *) +(* caddq: conditional addition of q for signed-to-unsigned reduction *) +(* *) +(* Maps a coefficient in (-q, q) to [0, q) by adding q if negative. *) +(* ========================================================================= *) + +let mldsa_caddq_spec = new_definition + `mldsa_caddq_spec (x:int) : int = + if x < &0 then x + &8380417 else x`;; + +let MLDSA_CADDQ_SPEC_REM = prove + (`!x. abs x < &8380417 ==> mldsa_caddq_spec x = x rem &8380417`, + GEN_TAC THEN REWRITE_TAC[mldsa_caddq_spec] THEN DISCH_TAC THEN + COND_CASES_TAC THENL + [ONCE_REWRITE_TAC[EQ_SYM_EQ] THEN + REWRITE_TAC[INT_REM_UNIQUE] THEN + CONV_TAC INT_REDUCE_CONV THEN + CONJ_TAC THENL [ASM_INT_ARITH_TAC; CONV_TAC INTEGER_RULE]; + MATCH_MP_TAC(GSYM INT_REM_LT) THEN ASM_INT_ARITH_TAC]);; + +let MLDSA_CADDQ_SPEC_BOUNDS = prove + (`!x. abs x < &8380417 + ==> &0 <= mldsa_caddq_spec x /\ mldsa_caddq_spec x < &8380417`, + GEN_TAC THEN DISCH_TAC THEN + MP_TAC(SPEC `x:int` MLDSA_CADDQ_SPEC_REM) THEN ASM_REWRITE_TAC[] THEN + DISCH_THEN SUBST1_TAC THEN CONJ_TAC THENL + [MP_TAC(SPECL [`x:int`; `&8380417:int`] INT_REM_POS) THEN INT_ARITH_TAC; + MP_TAC(SPECL [`x:int`; `&8380417:int`] INT_LT_REM) THEN INT_ARITH_TAC]);; + +let MLDSA_CADDQ_SPEC_LOWER = prove + (`!x. abs x < &8380417 ==> &0 <= mldsa_caddq_spec x`, + MESON_TAC[MLDSA_CADDQ_SPEC_BOUNDS]);; + +let MLDSA_CADDQ_SPEC_UPPER = prove + (`!x. abs x < &8380417 ==> mldsa_caddq_spec x < &8380417`, + MESON_TAC[MLDSA_CADDQ_SPEC_BOUNDS]);; + (* ========================================================================= *) (* decompose: bound lemmas for FST/SND of mldsa_decompose_{32,88} *) (* *) diff --git a/proofs/hol_light/x86_64/Makefile b/proofs/hol_light/x86_64/Makefile index d8efdc8ac..ee9506698 100644 --- a/proofs/hol_light/x86_64/Makefile +++ b/proofs/hol_light/x86_64/Makefile @@ -53,6 +53,7 @@ SPLIT=tr ';' '\n' OBJ = mldsa/ntt_avx2_asm.o \ mldsa/intt_avx2_asm.o \ mldsa/nttunpack_avx2_asm.o \ + mldsa/poly_caddq_avx2_asm.o \ mldsa/pointwise_avx2_asm.o \ mldsa/pointwise_acc_l4_avx2_asm.o \ mldsa/pointwise_acc_l5_avx2_asm.o \ diff --git a/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S b/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S new file mode 100644 index 000000000..f14fb725a --- /dev/null +++ b/proofs/hol_light/x86_64/mldsa/poly_caddq_avx2_asm.S @@ -0,0 +1,187 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [REF_AVX2] + * CRYSTALS-Dilithium optimized AVX2 implementation + * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé + * https://github.com/pq-crystals/dilithium/tree/master/avx2 + */ + +/* + * This file is derived from the public domain + * AVX2 Dilithium implementation @[REF_AVX2]. + */ + + +/************************************************* + * Name: mld_poly_caddq_avx2_asm + * + * Description: For all coefficients of in/out polynomial add Q if + * coefficient is negative. + * + * Arguments: - int32_t *r: pointer to input/output polynomial + **************************************************/ + + + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/x86_64/src/poly_caddq_avx2_asm.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_poly_caddq_avx2_asm +_PQCP_MLDSA_NATIVE_MLDSA44_poly_caddq_avx2_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_poly_caddq_avx2_asm +PQCP_MLDSA_NATIVE_MLDSA44_poly_caddq_avx2_asm: +#endif + + .cfi_startproc + endbr64 + vpxor %xmm2, %xmm2, %xmm2 + movl $0x7fe001, %eax # imm = 0x7FE001 + vmovd %eax, %xmm1 + vpbroadcastd %xmm1, %ymm1 + vpcmpgtd (%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd (%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, (%rdi) + vpcmpgtd 0x20(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x20(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x20(%rdi) + vpcmpgtd 0x40(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x40(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x40(%rdi) + vpcmpgtd 0x60(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x60(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x60(%rdi) + vpcmpgtd 0x80(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x80(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x80(%rdi) + vpcmpgtd 0xa0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0xa0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0xa0(%rdi) + vpcmpgtd 0xc0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0xc0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0xc0(%rdi) + vpcmpgtd 0xe0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0xe0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0xe0(%rdi) + vpcmpgtd 0x100(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x100(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x100(%rdi) + vpcmpgtd 0x120(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x120(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x120(%rdi) + vpcmpgtd 0x140(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x140(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x140(%rdi) + vpcmpgtd 0x160(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x160(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x160(%rdi) + vpcmpgtd 0x180(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x180(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x180(%rdi) + vpcmpgtd 0x1a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x1a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x1a0(%rdi) + vpcmpgtd 0x1c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x1c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x1c0(%rdi) + vpcmpgtd 0x1e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x1e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x1e0(%rdi) + vpcmpgtd 0x200(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x200(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x200(%rdi) + vpcmpgtd 0x220(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x220(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x220(%rdi) + vpcmpgtd 0x240(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x240(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x240(%rdi) + vpcmpgtd 0x260(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x260(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x260(%rdi) + vpcmpgtd 0x280(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x280(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x280(%rdi) + vpcmpgtd 0x2a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x2a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x2a0(%rdi) + vpcmpgtd 0x2c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x2c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x2c0(%rdi) + vpcmpgtd 0x2e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x2e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x2e0(%rdi) + vpcmpgtd 0x300(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x300(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x300(%rdi) + vpcmpgtd 0x320(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x320(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x320(%rdi) + vpcmpgtd 0x340(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x340(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x340(%rdi) + vpcmpgtd 0x360(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x360(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x360(%rdi) + vpcmpgtd 0x380(%rdi), %ymm2, %ymm0 + vpand %ymm1, %ymm0, %ymm0 + vpaddd 0x380(%rdi), %ymm0, %ymm0 + vmovdqa %ymm0, 0x380(%rdi) + vpcmpgtd 0x3a0(%rdi), %ymm2, %ymm3 + vpand %ymm1, %ymm3, %ymm3 + vpaddd 0x3a0(%rdi), %ymm3, %ymm3 + vmovdqa %ymm3, 0x3a0(%rdi) + vpcmpgtd 0x3c0(%rdi), %ymm2, %ymm4 + vpand %ymm1, %ymm4, %ymm4 + vpaddd 0x3c0(%rdi), %ymm4, %ymm4 + vmovdqa %ymm4, 0x3c0(%rdi) + vpcmpgtd 0x3e0(%rdi), %ymm2, %ymm5 + vpand %ymm1, %ymm5, %ymm5 + vpaddd 0x3e0(%rdi), %ymm5, %ymm5 + vmovdqa %ymm5, 0x3e0(%rdi) + retq + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/x86_64/proofs/dump_bytecode.ml b/proofs/hol_light/x86_64/proofs/dump_bytecode.ml index 5b3b92936..27d1c3adb 100644 --- a/proofs/hol_light/x86_64/proofs/dump_bytecode.ml +++ b/proofs/hol_light/x86_64/proofs/dump_bytecode.ml @@ -36,3 +36,7 @@ print_string "==== bytecode end =====================================\n\n";; print_string "=== bytecode start: x86_64/mldsa/keccak_f1600_x4_avx2_asm.o ================\n";; print_literal_from_elf "x86_64/mldsa/keccak_f1600_x4_avx2_asm.o";; print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: x86_64/mldsa/poly_caddq_avx2_asm.o ================\n";; +print_literal_from_elf "x86_64/mldsa/poly_caddq_avx2_asm.o";; +print_string "==== bytecode end =====================================\n\n";; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_utils.ml b/proofs/hol_light/x86_64/proofs/mldsa_utils.ml index f94a9c970..f52935275 100644 --- a/proofs/hol_light/x86_64/proofs/mldsa_utils.ml +++ b/proofs/hol_light/x86_64/proofs/mldsa_utils.ml @@ -49,7 +49,7 @@ let MAP_UNTIL_TARGET_PC f n = fun (asl, w) -> (* We assume that the goal has the form `ensure arm (\s. ... /\ read PC s = some_value /\ ...)` *) let extract_target_pc_from_goal goal = - let _, insts, _ = term_match [] `eventually arm (\s'. P) some_state` goal in + let _, insts, _ = term_match [] `eventually x86 (\s'. P) some_state` goal in insts |> rev_assoc `P: bool` |> conjuncts diff --git a/proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml b/proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml new file mode 100644 index 000000000..95ae194f8 --- /dev/null +++ b/proofs/hol_light/x86_64/proofs/poly_caddq_avx2_asm.ml @@ -0,0 +1,472 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Functional correctness of poly_caddq: *) +(* Modular reduction of polynomial coefficients from (-q, q) to [0, q) *) +(* ========================================================================= *) + +needs "s2n_bignum/x86/proofs/base.ml";; +needs "mldsa_native/common/mldsa_specs.ml";; +needs "mldsa_native/x86_64/proofs/mldsa_utils.ml";; + +(**** print_literal_from_elf "x86_64/mldsa/poly_caddq_avx2_asm.o";; + ****) + +let mldsa_caddq_mc = define_assert_from_elf "mldsa_caddq_mc" "x86_64/mldsa/poly_caddq_avx2_asm.o" +(*** BYTECODE START ***) +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0xc5; 0xe9; 0xef; 0xd2; (* VPXOR (%_% xmm2) (%_% xmm2) (%_% xmm2) *) + 0xb8; 0x01; 0xe0; 0x7f; 0x00; + (* MOV (% eax) (Imm32 (word 8380417)) *) + 0xc5; 0xf9; 0x6e; 0xc8; (* VMOVD (%_% xmm1) (% eax) *) + 0xc4; 0xe2; 0x7d; 0x58; 0xc9; + (* VPBROADCASTD (%_% ymm1) (%_% xmm1) *) + 0xc5; 0xed; 0x66; 0x07; (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,0))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x07; (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,0))) *) + 0xc5; 0xfd; 0x7f; 0x07; (* VMOVDQA (Memop Word256 (%% (rdi,0))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x5f; 0x20; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,32))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x5f; 0x20; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,32))) *) + 0xc5; 0xfd; 0x7f; 0x5f; 0x20; + (* VMOVDQA (Memop Word256 (%% (rdi,32))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0x67; 0x40; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,64))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0x67; 0x40; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,64))) *) + 0xc5; 0xfd; 0x7f; 0x67; 0x40; + (* VMOVDQA (Memop Word256 (%% (rdi,64))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0x6f; 0x60; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,96))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0x6f; 0x60; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,96))) *) + 0xc5; 0xfd; 0x7f; 0x6f; 0x60; + (* VMOVDQA (Memop Word256 (%% (rdi,96))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x80; 0x00; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,128))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x80; 0x00; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,128))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x80; 0x00; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,128))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0xa0; 0x00; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,160))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0xa0; 0x00; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,160))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0xa0; 0x00; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,160))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0xc0; 0x00; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,192))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0xc0; 0x00; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,192))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0xc0; 0x00; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,192))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0xe0; 0x00; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,224))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0xe0; 0x00; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,224))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0xe0; 0x00; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,224))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x00; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,256))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x00; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,256))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x00; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,256))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0x20; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,288))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0x20; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,288))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0x20; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,288))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0x40; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,320))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0x40; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,320))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0x40; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,320))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0x60; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,352))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0x60; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,352))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0x60; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,352))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x80; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,384))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x80; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,384))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x80; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,384))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0xa0; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,416))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0xa0; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,416))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0xa0; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,416))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0xc0; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,448))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0xc0; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,448))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0xc0; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,448))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0xe0; 0x01; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,480))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0xe0; 0x01; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,480))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0xe0; 0x01; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,480))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x00; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,512))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x00; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,512))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x00; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,512))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0x20; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,544))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0x20; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,544))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0x20; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,544))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0x40; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,576))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0x40; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,576))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0x40; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,576))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0x60; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,608))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0x60; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,608))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0x60; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,608))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x80; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,640))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x80; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,640))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x80; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,640))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0xa0; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,672))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0xa0; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,672))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0xa0; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,672))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0xc0; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,704))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0xc0; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,704))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0xc0; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,704))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0xe0; 0x02; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,736))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0xe0; 0x02; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,736))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0xe0; 0x02; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,736))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x00; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,768))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x00; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,768))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x00; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,768))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0x20; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,800))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0x20; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,800))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0x20; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,800))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0x40; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,832))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0x40; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,832))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0x40; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,832))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0x60; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,864))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0x60; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,864))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0x60; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,864))) (%_% ymm5) *) + 0xc5; 0xed; 0x66; 0x87; 0x80; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm0) (%_% ymm2) (Memop Word256 (%% (rdi,896))) *) + 0xc5; 0xfd; 0xdb; 0xc1; (* VPAND (%_% ymm0) (%_% ymm0) (%_% ymm1) *) + 0xc5; 0xfd; 0xfe; 0x87; 0x80; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm0) (%_% ymm0) (Memop Word256 (%% (rdi,896))) *) + 0xc5; 0xfd; 0x7f; 0x87; 0x80; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,896))) (%_% ymm0) *) + 0xc5; 0xed; 0x66; 0x9f; 0xa0; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm3) (%_% ymm2) (Memop Word256 (%% (rdi,928))) *) + 0xc5; 0xe5; 0xdb; 0xd9; (* VPAND (%_% ymm3) (%_% ymm3) (%_% ymm1) *) + 0xc5; 0xe5; 0xfe; 0x9f; 0xa0; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm3) (%_% ymm3) (Memop Word256 (%% (rdi,928))) *) + 0xc5; 0xfd; 0x7f; 0x9f; 0xa0; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,928))) (%_% ymm3) *) + 0xc5; 0xed; 0x66; 0xa7; 0xc0; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm4) (%_% ymm2) (Memop Word256 (%% (rdi,960))) *) + 0xc5; 0xdd; 0xdb; 0xe1; (* VPAND (%_% ymm4) (%_% ymm4) (%_% ymm1) *) + 0xc5; 0xdd; 0xfe; 0xa7; 0xc0; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm4) (%_% ymm4) (Memop Word256 (%% (rdi,960))) *) + 0xc5; 0xfd; 0x7f; 0xa7; 0xc0; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,960))) (%_% ymm4) *) + 0xc5; 0xed; 0x66; 0xaf; 0xe0; 0x03; 0x00; 0x00; + (* VPCMPGTD (%_% ymm5) (%_% ymm2) (Memop Word256 (%% (rdi,992))) *) + 0xc5; 0xd5; 0xdb; 0xe9; (* VPAND (%_% ymm5) (%_% ymm5) (%_% ymm1) *) + 0xc5; 0xd5; 0xfe; 0xaf; 0xe0; 0x03; 0x00; 0x00; + (* VPADDD (%_% ymm5) (%_% ymm5) (Memop Word256 (%% (rdi,992))) *) + 0xc5; 0xfd; 0x7f; 0xaf; 0xe0; 0x03; 0x00; 0x00; + (* VMOVDQA (Memop Word256 (%% (rdi,992))) (%_% ymm5) *) + 0xc3 (* RET *) +];; +(*** BYTECODE END ***) + +let mldsa_caddq_tmc = define_trimmed "mldsa_caddq_tmc" mldsa_caddq_mc;; +let MLDSA_CADDQ_TMC_EXEC = X86_MK_CORE_EXEC_RULE mldsa_caddq_tmc;; + +(* ------------------------------------------------------------------------- *) +(* Functional specification of mldsa_caddq *) +(* ------------------------------------------------------------------------- *) + +let mldsa_caddq = define + `(mldsa_caddq:int32->int32) x = + word_add x + (word_and + (if word_igt (word 0:int32) x then word 0xffffffff else word 0) + (word 8380417))`;; + +let mldsa_caddq_direct = prove + (`!x:int32. + ival(mldsa_caddq x) = mldsa_caddq_spec(ival x)`, + REWRITE_TAC[mldsa_caddq; mldsa_caddq_spec] THEN BITBLAST_TAC);; + +(* ------------------------------------------------------------------------- *) +(* Core correctness theorem *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_CADDQ_CORRECT = time prove + (`!a x pc. + aligned 32 a /\ + nonoverlapping (word pc,876) (a, 1024) + ==> ensures x86 + (\s. bytes_loaded s (word pc) (BUTLAST mldsa_caddq_tmc) /\ + read RIP s = word pc /\ + C_ARGUMENTS [a] s /\ + (!i. i < 256 + ==> read(memory :> bytes32(word_add a (word(4 * i)))) s = + x i) /\ + (!i. i < 256 ==> abs(ival(x i)) < &8380417)) + (\s. read RIP s = word(pc + 875) /\ + (!i. i < 256 + ==> ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) = + ival(x i) rem &8380417) /\ + (!i. i < 256 + ==> &0 <= ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) /\ + ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) < &8380417)) + (MAYCHANGE [RIP] ,, MAYCHANGE [events] ,, + MAYCHANGE [ZMM0; ZMM1; ZMM2; ZMM3; ZMM4; ZMM5] ,, + MAYCHANGE [RAX] ,, MAYCHANGE SOME_FLAGS ,, + MAYCHANGE [memory :> bytes(a,1024)])`, + + MAP_EVERY X_GEN_TAC [`a:int64`; `x:num->int32`; `pc:num`] THEN + REWRITE_TAC[NONOVERLAPPING_CLAUSES; C_ARGUMENTS; fst MLDSA_CADDQ_TMC_EXEC] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN REPEAT STRIP_TAC THEN + REWRITE_TAC [SOME_FLAGS; fst MLDSA_CADDQ_TMC_EXEC] THEN + GHOST_INTRO_TAC `init_ymm0:int256` `read YMM0` THEN + GHOST_INTRO_TAC `init_ymm1:int256` `read YMM1` THEN + ENSURES_INIT_TAC "s0" THEN + MP_TAC(end_itlist CONJ (map (fun n -> READ_MEMORY_MERGE_CONV 3 + (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add a (word n))) s0`)) + (0--31))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + STRIP_TAC THEN + MAP_EVERY (fun n -> + X86_STEPS_TAC MLDSA_CADDQ_TMC_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[mldsa_caddq]) + (1--132) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE(SIMD_SIMPLIFY_CONV[mldsa_caddq]) o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s132:int256 = xxx`) o concl))) THEN + CONV_TAC(ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + ONCE_REWRITE_TAC[WORD_ADD_SYM] THEN + REWRITE_TAC[GSYM mldsa_caddq] THEN + DISCARD_NONMATCHING_ASSUMPTIONS [`abs (ival t) < &8380417`] THEN + REWRITE_TAC[mldsa_caddq_direct] THEN + REPEAT CONJ_TAC THEN + TRY(MATCH_MP_TAC MLDSA_CADDQ_SPEC_REM) THEN + TRY(MATCH_MP_TAC MLDSA_CADDQ_SPEC_LOWER) THEN + TRY(MATCH_MP_TAC MLDSA_CADDQ_SPEC_UPPER) THEN + ASM_REWRITE_TAC[]);; + +(* ------------------------------------------------------------------------- *) +(* Subroutine correctness theorem (includes return) *) +(* NOTE: This must be kept in sync with the CBMC specification *) +(* in mldsa/src/native/x86_64/src/arith_native_x86_64.h *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_CADDQ_NOIBT_SUBROUTINE_CORRECT = prove + (`!a x pc stackpointer returnaddress. + aligned 32 a /\ + nonoverlapping (word pc,LENGTH mldsa_caddq_tmc) (a,1024) /\ + nonoverlapping (stackpointer,8) (a,1024) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_caddq_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [a] s /\ + (!i. i < 256 + ==> read(memory :> bytes32(word_add a (word(4 * i)))) s = + x i) /\ + (!i. i < 256 ==> abs(ival(x i)) < &8380417)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 + ==> ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) = + ival(x i) rem &8380417) /\ + (!i. i < 256 + ==> &0 <= ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) /\ + ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) < &8380417)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,1024)])`, + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_caddq_tmc MLDSA_CADDQ_CORRECT);; + +let MLDSA_CADDQ_SUBROUTINE_CORRECT = prove + (`!a x pc stackpointer returnaddress. + aligned 32 a /\ + nonoverlapping (word pc,LENGTH mldsa_caddq_mc) (a,1024) /\ + nonoverlapping (stackpointer,8) (a,1024) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_caddq_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [a] s /\ + (!i. i < 256 + ==> read(memory :> bytes32(word_add a (word(4 * i)))) s = + x i) /\ + (!i. i < 256 ==> abs(ival(x i)) < &8380417)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 + ==> ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) = + ival(x i) rem &8380417) /\ + (!i. i < 256 + ==> &0 <= ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) /\ + ival(read(memory :> bytes32 + (word_add a (word(4 * i)))) s) < &8380417)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,1024)])`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE MLDSA_CADDQ_NOIBT_SUBROUTINE_CORRECT));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "s2n_bignum/x86/proofs/consttime.ml";; +needs "mldsa_native/x86_64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:true + (assoc "mldsa_poly_caddq_x86" subroutine_signatures) + (REWRITE_RULE[SOME_FLAGS] MLDSA_CADDQ_CORRECT) + MLDSA_CADDQ_TMC_EXEC;; + +let MLDSA_CADDQ_SAFE = time prove + (full_spec, + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars MLDSA_CADDQ_TMC_EXEC);; + +let MLDSA_CADDQ_NOIBT_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e a pc stackpointer returnaddress. + aligned 32 a /\ + nonoverlapping (word pc, LENGTH mldsa_caddq_tmc) (a, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 1024) + ==> ensures x86 + (\s. + bytes_loaded s (word pc) mldsa_caddq_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [a] s /\ + read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events a pc stackpointer returnaddress /\ + memaccess_inbounds e2 [a,1024; stackpointer,8] + [a,1024; stackpointer,8])) + (\s s'. true)`, + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_caddq_tmc MLDSA_CADDQ_SAFE THEN + DISCHARGE_SAFETY_PROPERTY_TAC);; + +let MLDSA_CADDQ_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e a pc stackpointer returnaddress. + aligned 32 a /\ + nonoverlapping (word pc, LENGTH mldsa_caddq_mc) (a, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 1024) + ==> ensures x86 + (\s. + bytes_loaded s (word pc) mldsa_caddq_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [a] s /\ + read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events a pc stackpointer returnaddress /\ + memaccess_inbounds e2 [a,1024; stackpointer,8] + [a,1024; stackpointer,8])) + (\s s'. true)`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE MLDSA_CADDQ_NOIBT_SUBROUTINE_SAFE));; diff --git a/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml b/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml index 06a599beb..8b32e6559 100644 --- a/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml +++ b/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml @@ -10,6 +10,21 @@ (* ========================================================================= *) let subroutine_signatures = [ +("mldsa_poly_caddq_x86", + ([(*args*) + ("a", "int32_t[static 256]", (*is const?*)"false"); + ], + "void", + [(* input buffers *) + ("a", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("a", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + ("mldsa_pointwise_x86", ([(*args*) ("a", "int32_t[static 256]", (*is const?*)"false"); diff --git a/scripts/autogen b/scripts/autogen index e958117f5..fb2bfca72 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -2734,6 +2734,12 @@ def gen_hol_light_asm(): x86_64_flags = "-mavx2 -mbmi2 -msse4 -fcf-protection=full" joblist_x86_64 = [ + ( + "poly_caddq_avx2_asm.S", + "dev/x86_64/src", + f"-Imldsa/src/native/x86_64/src -Imldsa/src/common.h {x86_64_flags}", + "x86_64", + ), ( "ntt_avx2_asm.S", "dev/x86_64/src",