From 8cd4a46c1aee3417daf778c3f9f30f058d78c5d1 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 25 Feb 2026 08:09:09 +0000 Subject: [PATCH 1/6] Add generic-size bignum_mod_p256k1 and bignum_mod_n256k1 for Arm Add bignum_mod_p256k1 (field characteristic reduction) and bignum_mod_n256k1 (group order reduction) for secp256k1, supporting arbitrary input sizes via iterative long division. Includes ARM assembly, HOL Light formal proofs, tests, and benchmark hooks. Co-Authored-By: Claude Opus 4.6 --- arm/Makefile | 2 + arm/proofs/bignum_mod_n256k1.ml | 419 ++++++++++++++++++++++++++++ arm/proofs/bignum_mod_p256k1.ml | 391 ++++++++++++++++++++++++++ arm/proofs/subroutine_signatures.ml | 34 +++ arm/secp256k1/Makefile | 2 + arm/secp256k1/bignum_mod_n256k1.S | 172 ++++++++++++ arm/secp256k1/bignum_mod_p256k1.S | 154 ++++++++++ benchmarks/benchmark.c | 4 + include/s2n-bignum-c89.h | 8 + include/s2n-bignum.h | 8 + tests/test.c | 58 ++++ tools/collect-signatures.py | 2 + 12 files changed, 1254 insertions(+) create mode 100644 arm/proofs/bignum_mod_n256k1.ml create mode 100644 arm/proofs/bignum_mod_p256k1.ml create mode 100644 arm/secp256k1/bignum_mod_n256k1.S create mode 100644 arm/secp256k1/bignum_mod_p256k1.S diff --git a/arm/Makefile b/arm/Makefile index 210da51fc..df1d2cf00 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -342,7 +342,9 @@ BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ secp256k1/bignum_demont_p256k1.o \ secp256k1/bignum_double_p256k1.o \ secp256k1/bignum_half_p256k1.o \ + secp256k1/bignum_mod_n256k1.o \ secp256k1/bignum_mod_n256k1_4.o \ + secp256k1/bignum_mod_p256k1.o \ secp256k1/bignum_mod_p256k1_4.o \ secp256k1/bignum_montmul_p256k1.o \ secp256k1/bignum_montmul_p256k1_alt.o \ diff --git a/arm/proofs/bignum_mod_n256k1.ml b/arm/proofs/bignum_mod_n256k1.ml new file mode 100644 index 000000000..0c47b10dc --- /dev/null +++ b/arm/proofs/bignum_mod_n256k1.ml @@ -0,0 +1,419 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Reduction modulo n_256k1, the order of the secp256k1 curve. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; + +(**** print_literal_from_elf "arm/secp256k1/bignum_mod_n256k1.o";; + ****) + +let bignum_mod_n256k1_mc = + define_assert_from_elf "bignum_mod_n256k1_mc" "arm/secp256k1/bignum_mod_n256k1.o" +[ + 0xf100103f; (* arm_CMP X1 (rvalue (word 4)) *) + 0x540006a3; (* arm_BCC (word 212) *) + 0xd1001021; (* arm_SUB X1 X1 (rvalue (word 4)) *) + 0xd37df027; (* arm_LSL X7 X1 (rvalue (word 3)) *) + 0x8b0200e7; (* arm_ADD X7 X7 X2 *) + 0xa94118e5; (* arm_LDP X5 X6 X7 (Immediate_Offset (iword (&16))) *) + 0xa94010e3; (* arm_LDP X3 X4 X7 (Immediate_Offset (iword (&0))) *) + 0xd297d7ec; (* arm_MOV X12 (rvalue (word 48831)) *) + 0xf2a5f92c; (* arm_MOVK X12 (word 12233) 16 *) + 0xf2d42e6c; (* arm_MOVK X12 (word 41331) 32 *) + 0xf2e805ac; (* arm_MOVK X12 (word 16429) 48 *) + 0xd28bf88d; (* arm_MOV X13 (rvalue (word 24516)) *) + 0xf2aa16ed; (* arm_MOVK X13 (word 20663) 16 *) + 0xf2c4632d; (* arm_MOVK X13 (word 8985) 32 *) + 0xf2e8aa2d; (* arm_MOVK X13 (word 17745) 48 *) + 0xab0c0067; (* arm_ADDS X7 X3 X12 *) + 0xba0d0088; (* arm_ADCS X8 X4 X13 *) + 0xd280002b; (* arm_MOV X11 (rvalue (word 1)) *) + 0xba0b00a9; (* arm_ADCS X9 X5 X11 *) + 0xba1f00ca; (* arm_ADCS X10 X6 XZR *) + 0x9a873063; (* arm_CSEL X3 X3 X7 Condition_CC *) + 0x9a883084; (* arm_CSEL X4 X4 X8 Condition_CC *) + 0x9a8930a5; (* arm_CSEL X5 X5 X9 Condition_CC *) + 0x9a8a30c6; (* arm_CSEL X6 X6 X10 Condition_CC *) + 0xb4000361; (* arm_CBZ X1 (word 108) *) + 0xb10004ce; (* arm_ADDS X14 X6 (rvalue (word 1)) *) + 0xda9f31ce; (* arm_CSINV X14 X14 XZR Condition_CC *) + 0x9b0e7d87; (* arm_MUL X7 X12 X14 *) + 0x9b0e7da8; (* arm_MUL X8 X13 X14 *) + 0x9bce7d89; (* arm_UMULH X9 X12 X14 *) + 0xab090108; (* arm_ADDS X8 X8 X9 *) + 0x9bce7da9; (* arm_UMULH X9 X13 X14 *) + 0x9a1f0129; (* arm_ADC X9 X9 XZR *) + 0xab0e0129; (* arm_ADDS X9 X9 X14 *) + 0x9a1f03ea; (* arm_ADC X10 XZR XZR *) + 0xcb0e00c6; (* arm_SUB X6 X6 X14 *) + 0xd1000421; (* arm_SUB X1 X1 (rvalue (word 1)) *) + 0xf861784e; (* arm_LDR X14 X2 (Shiftreg_Offset X1 3) *) + 0xab0701c7; (* arm_ADDS X7 X14 X7 *) + 0xba080068; (* arm_ADCS X8 X3 X8 *) + 0xba090089; (* arm_ADCS X9 X4 X9 *) + 0xba0a00aa; (* arm_ADCS X10 X5 X10 *) + 0x9a1f00cb; (* arm_ADC X11 X6 XZR *) + 0x8a0c016e; (* arm_AND X14 X11 X12 *) + 0xeb0e00e3; (* arm_SUBS X3 X7 X14 *) + 0x8a0d016e; (* arm_AND X14 X11 X13 *) + 0xfa0e0104; (* arm_SBCS X4 X8 X14 *) + 0x9240016e; (* arm_AND X14 X11 (rvalue (word 1)) *) + 0xfa0e0125; (* arm_SBCS X5 X9 X14 *) + 0xda1f0146; (* arm_SBC X6 X10 XZR *) + 0xb5fffce1; (* arm_CBNZ X1 (word 2097052) *) + 0xa9001003; (* arm_STP X3 X4 X0 (Immediate_Offset (iword (&0))) *) + 0xa9011805; (* arm_STP X5 X6 X0 (Immediate_Offset (iword (&16))) *) + 0xd65f03c0; (* arm_RET X30 *) + 0xaa1f03e3; (* arm_MOV X3 XZR *) + 0xaa1f03e4; (* arm_MOV X4 XZR *) + 0xaa1f03e5; (* arm_MOV X5 XZR *) + 0xaa1f03e6; (* arm_MOV X6 XZR *) + 0xb4ffff21; (* arm_CBZ X1 (word 2097124) *) + 0xf9400043; (* arm_LDR X3 X2 (Immediate_Offset (word 0)) *) + 0xf1000421; (* arm_SUBS X1 X1 (rvalue (word 1)) *) + 0x54fffec0; (* arm_BEQ (word 2097112) *) + 0xf9400444; (* arm_LDR X4 X2 (Immediate_Offset (word 8)) *) + 0xf1000421; (* arm_SUBS X1 X1 (rvalue (word 1)) *) + 0x54fffe60; (* arm_BEQ (word 2097100) *) + 0xf9400845; (* arm_LDR X5 X2 (Immediate_Offset (word 16)) *) + 0x17fffff1 (* arm_B (word 268435396) *) +];; + +let BIGNUM_MOD_N256K1_EXEC = ARM_MK_EXEC_RULE bignum_mod_n256k1_mc;; + +(* ------------------------------------------------------------------------- *) +(* Proof. *) +(* ------------------------------------------------------------------------- *) + +let n_256k1 = new_definition `n_256k1 = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141`;; + +let n256k1longredlemma = prove + (`!n. n < 2 EXP 64 * n_256k1 + ==> let q = MIN (n DIV 2 EXP 256 + 1) (2 EXP 64 - 1) in + q < 2 EXP 64 /\ + q * n_256k1 <= n + n_256k1 /\ + n < q * n_256k1 + n_256k1`, + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[n_256k1] THEN ARITH_TAC);; + +let BIGNUM_MOD_N256K1_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0x10c) (z,32) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_mod_n256k1_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read PC s = word (pc + 0xd4) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [PC; X1; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13; X14] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + X_GEN_TAC `z:int64` THEN W64_GEN_TAC `k:num` THEN + MAP_EVERY X_GEN_TAC [`x:int64`; `n:num`; `pc:num`] THEN + REWRITE_TAC[NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS] THEN DISCH_TAC THEN + BIGNUM_TERMRANGE_TAC `k:num` `n:num` THEN + + (*** Case split over the k < 4 case, which is a different path ***) + + ASM_CASES_TAC `k < 4` THENL + [SUBGOAL_THEN `n MOD n_256k1 = n` SUBST1_TAC THENL + [MATCH_MP_TAC MOD_LT THEN + TRANS_TAC LTE_TRANS `2 EXP (64 * k)` THEN ASM_REWRITE_TAC[] THEN + TRANS_TAC LE_TRANS `2 EXP (64 * 3)` THEN + ASM_REWRITE_TAC[LE_EXP; n_256k1] THEN CONV_TAC NUM_REDUCE_CONV THEN + ASM_ARITH_TAC; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + BIGNUM_DIGITIZE_TAC "n_" `read(memory :> bytes(x,8 * 4)) s0` THEN + FIRST_X_ASSUM(MP_TAC o MATCH_MP (ARITH_RULE + `k < 4 ==> k = 0 \/ k = 1 \/ k = 2 \/ k = 3`)) THEN + DISCH_THEN(REPEAT_TCL DISJ_CASES_THEN SUBST_ALL_TAC) THEN + EXPAND_TAC "n" THEN CONV_TAC(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THENL + [ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--9); + ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--12); + ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--15); + ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--17)] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[VAL_WORD_0] THEN + ARITH_TAC; + FIRST_ASSUM(ASSUME_TAC o GEN_REWRITE_RULE I [NOT_LT])] THEN + + (*** Initial 4-digit modulus ***) + + ENSURES_SEQUENCE_TAC `pc + 0x60` + `\s. bignum_from_memory(x,k) s = n /\ + read X0 s = z /\ + read X2 s = x /\ + read X1 s = word(k - 4) /\ + read X12 s = word 0x402DA1732FC9BEBF /\ + read X13 s = word 0x4551231950B75FC4 /\ + bignum_of_wordlist[read X3 s; read X4 s; read X5 s; read X6 s] = + (highdigits n (k - 4)) MOD n_256k1` THEN + CONJ_TAC THENL + [ABBREV_TAC `j = k - 4` THEN VAL_INT64_TAC `j:num` THEN + SUBGOAL_THEN `word_sub (word k) (word 4):int64 = word j` ASSUME_TAC THENL + [SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + REWRITE_TAC[WORD_SUB] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + EXPAND_TAC "n" THEN REWRITE_TAC[highdigits] THEN + REWRITE_TAC[GSYM BIGNUM_FROM_MEMORY_BYTES; BIGNUM_FROM_MEMORY_DIV] THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + ASM_SIMP_TAC[ARITH_RULE `4 <= k ==> k - (k - 4) = 4`] THEN + ABBREV_TAC `m = bignum_from_memory(word_add x (word (8 * j)),4) s0` THEN + SUBGOAL_THEN `m < 2 EXP (64 * 4)` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BOUND]; ALL_TAC] THEN + RULE_ASSUM_TAC(CONV_RULE(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV)) THEN + BIGNUM_DIGITIZE_TAC "m_" + `read (memory :> bytes (word_add x (word(8 * j)),8 * 4)) s0` THEN + ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--5) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE + `word_add (word_shl (word j) 3) x = word_add x (word(8 * j))`]) THEN + ARM_ACCSTEPS_TAC BIGNUM_MOD_N256K1_EXEC [16;17;19;20] (6--20) THEN + SUBGOAL_THEN `carry_s20 <=> n_256k1 <= m` SUBST_ALL_TAC THENL + [MATCH_MP_TAC FLAG_FROM_CARRY_LE THEN EXISTS_TAC `256` THEN + EXPAND_TAC "m" THEN REWRITE_TAC[n_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN BOUNDER_TAC[]; + ALL_TAC] THEN + ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (21--24) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ASM_REWRITE_TAC[] THEN DISCARD_STATE_TAC "s24" THEN + W(MP_TAC o PART_MATCH (lhand o rand) MOD_CASES o rand o snd) THEN + ANTS_TAC THENL + [TRANS_TAC LTE_TRANS `2 EXP (64 * 4)` THEN + ASM_REWRITE_TAC[n_256k1] THEN CONV_TAC NUM_REDUCE_CONV; + DISCH_THEN SUBST1_TAC] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THENL + [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN + MATCH_MP_TAC EQUAL_FROM_CONGRUENT_REAL THEN + MAP_EVERY EXISTS_TAC [`256`; `&0:real`] THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN CONJ_TAC THENL + [REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + UNDISCH_TAC `m < 2 EXP (64 * 4)` THEN REWRITE_TAC[n_256k1] THEN + ARITH_TAC; + ALL_TAC] THEN + CONJ_TAC THENL [REAL_INTEGER_TAC; ALL_TAC] THEN + RULE_ASSUM_TAC(REWRITE_RULE[NOT_LT]) THEN + ASM_SIMP_TAC[GSYM REAL_OF_NUM_SUB] THEN + EXPAND_TAC "m" THEN REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + ASM_REWRITE_TAC[BITVAL_CLAUSES; n_256k1] THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ALL_TAC] THEN + + (*** Finish off the k = 4 case which is now just the writeback ***) + + FIRST_ASSUM(DISJ_CASES_THEN2 SUBST_ALL_TAC ASSUME_TAC o MATCH_MP (ARITH_RULE + `4 <= k ==> k = 4 \/ 4 < k`)) + THENL + [GHOST_INTRO_TAC `d0:int64` `read X3` THEN + GHOST_INTRO_TAC `d1:int64` `read X4` THEN + GHOST_INTRO_TAC `d2:int64` `read X5` THEN + GHOST_INTRO_TAC `d3:int64` `read X6` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--3) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC; + ALL_TAC] THEN + + SUBGOAL_THEN `0 < k - 4 /\ ~(k - 4 = 0)` STRIP_ASSUME_TAC THENL + [SIMPLE_ARITH_TAC; ALL_TAC] THEN + + (*** Setup of loop invariant ***) + + ENSURES_WHILE_DOWN_TAC `k - 4` `pc + 0x64` `pc + 0xc8` + `\i s. bignum_from_memory(x,k) s = n /\ + read X0 s = z /\ + read X2 s = x /\ + read X1 s = word i /\ + read X12 s = word 0x402DA1732FC9BEBF /\ + read X13 s = word 0x4551231950B75FC4 /\ + bignum_of_wordlist[read X3 s; read X4 s; read X5 s; read X6 s] = + (highdigits n i) MOD n_256k1` THEN + ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL + [VAL_INT64_TAC `k - 4` THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + ALL_TAC; (*** Main loop invariant ***) + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + GHOST_INTRO_TAC `d0:int64` `read X3` THEN + GHOST_INTRO_TAC `d1:int64` `read X4` THEN + GHOST_INTRO_TAC `d2:int64` `read X5` THEN + GHOST_INTRO_TAC `d3:int64` `read X6` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_N256K1_EXEC (1--3) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC] THEN + + (*** Mathematics of main loop with decomposition and quotient estimate ***) + + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + GHOST_INTRO_TAC `m1:int64` `read X3` THEN + GHOST_INTRO_TAC `m2:int64` `read X4` THEN + GHOST_INTRO_TAC `m3:int64` `read X5` THEN + GHOST_INTRO_TAC `m4:int64` `read X6` THEN + GLOBALIZE_PRECONDITION_TAC THEN ASM_REWRITE_TAC[] THEN + ABBREV_TAC `m0:int64 = word(bigdigit n i)` THEN + ABBREV_TAC `m = bignum_of_wordlist[m0; m1; m2; m3; m4]` THEN + SUBGOAL_THEN `m < 2 EXP 64 * n_256k1` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + MP_TAC(SPEC `m0:int64` VAL_BOUND_64) THEN + ASM_REWRITE_TAC[n_256k1] THEN ARITH_TAC; + ALL_TAC] THEN + SUBGOAL_THEN `highdigits n i MOD n_256k1 = m MOD n_256k1` SUBST1_TAC THENL + [ONCE_REWRITE_TAC[HIGHDIGITS_STEP] THEN + EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + EXPAND_TAC "m0" THEN + SIMP_TAC[VAL_WORD_EQ; DIMINDEX_64; BIGDIGIT_BOUND] THEN + ASM_REWRITE_TAC[] THEN CONV_TAC MOD_DOWN_CONV THEN + AP_THM_TAC THEN AP_TERM_TAC THEN ARITH_TAC; + ALL_TAC] THEN + MP_TAC(SPEC `m:num` n256k1longredlemma) THEN ASM_REWRITE_TAC[] THEN + LET_TAC THEN STRIP_TAC THEN + + (*** The computation of the quotient estimate q ***) + + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + + ARM_ACCSTEPS_TAC BIGNUM_MOD_N256K1_EXEC [1] (1--2) THEN + + FIRST_X_ASSUM(MP_TAC o SPEC `word q:int64` o MATCH_MP (MESON[] + `!q. read X14 s = q' ==> q' = q ==> read X14 s = q`)) THEN + ANTS_TAC THENL + [EXPAND_TAC "q" THEN + EXPAND_TAC "m" THEN + CONV_TAC(ONCE_DEPTH_CONV BIGNUM_OF_WORDLIST_DIV_CONV) THEN + ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + BOOL_CASES_TAC `carry_s1:bool` THEN + ASM_REWRITE_TAC[BITVAL_CLAUSES; ADD_CLAUSES; MULT_CLAUSES] THEN + DISCH_TAC THENL + [SUBGOAL_THEN + `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = 2 EXP 64 - 1` + SUBST1_TAC THENL + [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; + CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV]; + SUBGOAL_THEN + `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = val m4 + 1` + SUBST1_TAC THENL + [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; + GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN + AP_TERM_TAC THEN ASM_ARITH_TAC]]; + DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN + + (*** The next digit in the current state ***) + + SUBGOAL_THEN `i:num < k` ASSUME_TAC THENL [SIMPLE_ARITH_TAC; ALL_TAC] THEN + MP_TAC(SPECL [`k:num`; `x:int64`; `s2:armstate`; `i:num`] + BIGDIGIT_BIGNUM_FROM_MEMORY) THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + DISCH_THEN(MP_TAC o AP_TERM `word:num->int64` o SYM) THEN + ASM_REWRITE_TAC[WORD_VAL] THEN DISCH_TAC THEN + + (*** A bit of fiddle to make the accumulation tactics work ***) + + ABBREV_TAC `w:int64 = word q` THEN + UNDISCH_THEN `val(w:int64) = q` (SUBST_ALL_TAC o SYM) THEN + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC o end_itlist CONJ) THEN + + (*** Subtraction of q * n_256k1 ***) + + ARM_ACCSTEPS_TAC BIGNUM_MOD_N256K1_EXEC + [3;4;5;6;7;8;9;10;11] (3--12) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE + `word_sub (word (i + 1)) (word 1):int64 = word i`]) THEN + + (*** Remove spurious accumulator entries created from rvalue expressions ***) + REPEAT(FIRST_X_ASSUM(fun th -> + if free_in `sum_s5:int64` (concl th) || + free_in `sum_s7:int64` (concl th) || + free_in `sum_s10:int64` (concl th) + then ALL_TAC else failwith "keep")) THEN + + ARM_ACCSTEPS_TAC BIGNUM_MOD_N256K1_EXEC [14;15;16;17;18] (13--18) THEN + + RULE_ASSUM_TAC(REWRITE_RULE[ADD_CLAUSES; VAL_WORD_BITVAL]) THEN + + SUBGOAL_THEN + `sum_s18:int64 = word_neg(word(bitval(m < val(w:int64) * n_256k1))) /\ + &(bignum_of_wordlist[sum_s14; sum_s15; sum_s16; sum_s17]) = + if m < val w * n_256k1 then &m - &(val w * n_256k1) + &2 pow 256 + else &m - &(val w * n_256k1)` + STRIP_ASSUME_TAC THENL + [MATCH_MP_TAC MASK_AND_VALUE_FROM_CARRY_LT THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC + [`val(w:int64) * n_256k1 <= m + n_256k1`; + `m < val(w:int64) * n_256k1 + n_256k1`] THEN + REWRITE_TAC[n_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN REAL_ARITH_TAC; + ASM_REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES]] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; CONV_TAC(ONCE_DEPTH_CONV NUM_ADD_CONV)] THEN + ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES; n_256k1] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN + REPEAT(MATCH_MP_TAC INTEGER_ADD THEN CONJ_TAC) THEN + TRY REAL_INTEGER_TAC THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC)] THEN + + (*** Final correction ***) + + ARM_ACCSTEPS_TAC BIGNUM_MOD_N256K1_EXEC [20;22;24;25] (19--25) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + CONV_TAC SYM_CONV THEN MATCH_MP_TAC MOD_UNIQ_BALANCED_REAL THEN + MAP_EVERY EXISTS_TAC [`val(w:int64)`; `256`] THEN + ASM_REWRITE_TAC[] THEN + ABBREV_TAC `b <=> m < val(w:int64) * n_256k1` THEN + REWRITE_TAC[REAL_ARITH + `m - s - (w - b) * n:real = (m - w * n) + (b * n - s)`] THEN + FIRST_ASSUM(MP_TAC o MATCH_MP (MESON[REAL_ADD_RID] + `x = (if p then y + z else y) ==> x = y + (if p then z else &0)`)) THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES; bignum_of_wordlist; n_256k1] THEN + GEN_REWRITE_TAC LAND_CONV [REAL_ARITH `x:real = y + z <=> y = x - z`] THEN + DISCH_THEN SUBST1_TAC THEN + CONJ_TAC THENL [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN POP_ASSUM_LIST(K ALL_TAC) THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC);; + +let BIGNUM_MOD_N256K1_SUBROUTINE_CORRECT = time prove + (`!z k x n pc returnaddress. + nonoverlapping (word pc,0x10c) (z,32) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_mod_n256k1_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read PC s = returnaddress /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4)])`, + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MOD_N256K1_EXEC + BIGNUM_MOD_N256K1_CORRECT);; diff --git a/arm/proofs/bignum_mod_p256k1.ml b/arm/proofs/bignum_mod_p256k1.ml new file mode 100644 index 000000000..4d17d01ed --- /dev/null +++ b/arm/proofs/bignum_mod_p256k1.ml @@ -0,0 +1,391 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Reduction modulo p_256k1, the field characteristic for secp256k1. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; + +(**** print_literal_from_elf "arm/secp256k1/bignum_mod_p256k1.o";; + ****) + +let bignum_mod_p256k1_mc = + define_assert_from_elf "bignum_mod_p256k1_mc" "arm/secp256k1/bignum_mod_p256k1.o" +[ + 0xf100103f; (* arm_CMP X1 (rvalue (word 4)) *) + 0x540004c3; (* arm_BCC (word 152) *) + 0xd1001021; (* arm_SUB X1 X1 (rvalue (word 4)) *) + 0xd37df027; (* arm_LSL X7 X1 (rvalue (word 3)) *) + 0x8b0200e7; (* arm_ADD X7 X7 X2 *) + 0xa94118e5; (* arm_LDP X5 X6 X7 (Immediate_Offset (iword (&16))) *) + 0xa94010e3; (* arm_LDP X3 X4 X7 (Immediate_Offset (iword (&0))) *) + 0xd2807a2c; (* arm_MOV X12 (rvalue (word 977)) *) + 0xb260018c; (* arm_ORR X12 X12 (rvalue (word 4294967296)) *) + 0xab0c0067; (* arm_ADDS X7 X3 X12 *) + 0xba1f0088; (* arm_ADCS X8 X4 XZR *) + 0xba1f00a9; (* arm_ADCS X9 X5 XZR *) + 0xba1f00ca; (* arm_ADCS X10 X6 XZR *) + 0x9a873063; (* arm_CSEL X3 X3 X7 Condition_CC *) + 0x9a883084; (* arm_CSEL X4 X4 X8 Condition_CC *) + 0x9a8930a5; (* arm_CSEL X5 X5 X9 Condition_CC *) + 0x9a8a30c6; (* arm_CSEL X6 X6 X10 Condition_CC *) + 0xb4000261; (* arm_CBZ X1 (word 76) *) + 0xb10004cd; (* arm_ADDS X13 X6 (rvalue (word 1)) *) + 0xda9f31ad; (* arm_CSINV X13 X13 XZR Condition_CC *) + 0x9b0c7da7; (* arm_MUL X7 X13 X12 *) + 0x9bcc7da8; (* arm_UMULH X8 X13 X12 *) + 0xcb0d00c6; (* arm_SUB X6 X6 X13 *) + 0xd1000421; (* arm_SUB X1 X1 (rvalue (word 1)) *) + 0xf861784d; (* arm_LDR X13 X2 (Shiftreg_Offset X1 3) *) + 0xab0701a7; (* arm_ADDS X7 X13 X7 *) + 0xba080068; (* arm_ADCS X8 X3 X8 *) + 0xba1f0089; (* arm_ADCS X9 X4 XZR *) + 0xba1f00aa; (* arm_ADCS X10 X5 XZR *) + 0x9a1f00cb; (* arm_ADC X11 X6 XZR *) + 0x8a0c016d; (* arm_AND X13 X11 X12 *) + 0xeb0d00e3; (* arm_SUBS X3 X7 X13 *) + 0xfa1f0104; (* arm_SBCS X4 X8 XZR *) + 0xfa1f0125; (* arm_SBCS X5 X9 XZR *) + 0xda1f0146; (* arm_SBC X6 X10 XZR *) + 0xb5fffde1; (* arm_CBNZ X1 (word 2097084) *) + 0xa9001003; (* arm_STP X3 X4 X0 (Immediate_Offset (iword (&0))) *) + 0xa9011805; (* arm_STP X5 X6 X0 (Immediate_Offset (iword (&16))) *) + 0xd65f03c0; (* arm_RET X30 *) + 0xaa1f03e3; (* arm_MOV X3 XZR *) + 0xaa1f03e4; (* arm_MOV X4 XZR *) + 0xaa1f03e5; (* arm_MOV X5 XZR *) + 0xaa1f03e6; (* arm_MOV X6 XZR *) + 0xb4ffff21; (* arm_CBZ X1 (word 2097124) *) + 0xf9400043; (* arm_LDR X3 X2 (Immediate_Offset (word 0)) *) + 0xf1000421; (* arm_SUBS X1 X1 (rvalue (word 1)) *) + 0x54fffec0; (* arm_BEQ (word 2097112) *) + 0xf9400444; (* arm_LDR X4 X2 (Immediate_Offset (word 8)) *) + 0xf1000421; (* arm_SUBS X1 X1 (rvalue (word 1)) *) + 0x54fffe60; (* arm_BEQ (word 2097100) *) + 0xf9400845; (* arm_LDR X5 X2 (Immediate_Offset (word 16)) *) + 0x17fffff1 (* arm_B (word 268435396) *) +];; + +let BIGNUM_MOD_P256K1_EXEC = ARM_MK_EXEC_RULE bignum_mod_p256k1_mc;; + +(* ------------------------------------------------------------------------- *) +(* Proof. *) +(* ------------------------------------------------------------------------- *) + +let p_256k1 = new_definition `p_256k1 = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F`;; + +let p256k1longredlemma = prove + (`!n. n < 2 EXP 64 * p_256k1 + ==> let q = MIN (n DIV 2 EXP 256 + 1) (2 EXP 64 - 1) in + q < 2 EXP 64 /\ + q * p_256k1 <= n + p_256k1 /\ + n < q * p_256k1 + p_256k1`, + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[p_256k1] THEN ARITH_TAC);; + +let BIGNUM_MOD_P256K1_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0xd0) (z,32) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_mod_p256k1_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read PC s = word (pc + 0x98) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [PC; X1; X3; X4; X5; X6; X7; X8; X9; + X10; X11; X12; X13] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + X_GEN_TAC `z:int64` THEN W64_GEN_TAC `k:num` THEN + MAP_EVERY X_GEN_TAC [`x:int64`; `n:num`; `pc:num`] THEN + REWRITE_TAC[NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS] THEN DISCH_TAC THEN + BIGNUM_TERMRANGE_TAC `k:num` `n:num` THEN + + (*** Case split over the k < 4 case, which is a different path ***) + + ASM_CASES_TAC `k < 4` THENL + [SUBGOAL_THEN `n MOD p_256k1 = n` SUBST1_TAC THENL + [MATCH_MP_TAC MOD_LT THEN + TRANS_TAC LTE_TRANS `2 EXP (64 * k)` THEN ASM_REWRITE_TAC[] THEN + TRANS_TAC LE_TRANS `2 EXP (64 * 3)` THEN + ASM_REWRITE_TAC[LE_EXP; p_256k1] THEN CONV_TAC NUM_REDUCE_CONV THEN + ASM_ARITH_TAC; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + BIGNUM_DIGITIZE_TAC "n_" `read(memory :> bytes(x,8 * 4)) s0` THEN + FIRST_X_ASSUM(MP_TAC o MATCH_MP (ARITH_RULE + `k < 4 ==> k = 0 \/ k = 1 \/ k = 2 \/ k = 3`)) THEN + DISCH_THEN(REPEAT_TCL DISJ_CASES_THEN SUBST_ALL_TAC) THEN + EXPAND_TAC "n" THEN CONV_TAC(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THENL + [ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--9); + ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--12); + ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--15); + ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--17)] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[VAL_WORD_0] THEN + ARITH_TAC; + FIRST_ASSUM(ASSUME_TAC o GEN_REWRITE_RULE I [NOT_LT])] THEN + + (*** Initial 4-digit modulus ***) + + ENSURES_SEQUENCE_TAC `pc + 0x44` + `\s. bignum_from_memory(x,k) s = n /\ + read X0 s = z /\ + read X2 s = x /\ + read X1 s = word(k - 4) /\ + read X12 s = word 0x1000003D1 /\ + bignum_of_wordlist[read X3 s; read X4 s; read X5 s; read X6 s] = + (highdigits n (k - 4)) MOD p_256k1` THEN + CONJ_TAC THENL + [ABBREV_TAC `j = k - 4` THEN VAL_INT64_TAC `j:num` THEN + SUBGOAL_THEN `word_sub (word k) (word 4):int64 = word j` ASSUME_TAC THENL + [SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + REWRITE_TAC[WORD_SUB] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + EXPAND_TAC "n" THEN REWRITE_TAC[highdigits] THEN + REWRITE_TAC[GSYM BIGNUM_FROM_MEMORY_BYTES; BIGNUM_FROM_MEMORY_DIV] THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + ASM_SIMP_TAC[ARITH_RULE `4 <= k ==> k - (k - 4) = 4`] THEN + ABBREV_TAC `m = bignum_from_memory(word_add x (word (8 * j)),4) s0` THEN + SUBGOAL_THEN `m < 2 EXP (64 * 4)` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BOUND]; ALL_TAC] THEN + RULE_ASSUM_TAC(CONV_RULE(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV)) THEN + BIGNUM_DIGITIZE_TAC "m_" + `read (memory :> bytes (word_add x (word(8 * j)),8 * 4)) s0` THEN + ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--5) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE + `word_add (word_shl (word j) 3) x = word_add x (word(8 * j))`]) THEN + ARM_ACCSTEPS_TAC BIGNUM_MOD_P256K1_EXEC (10--13) (6--13) THEN + SUBGOAL_THEN `carry_s13 <=> p_256k1 <= m` SUBST_ALL_TAC THENL + [MATCH_MP_TAC FLAG_FROM_CARRY_LE THEN EXISTS_TAC `256` THEN + EXPAND_TAC "m" THEN REWRITE_TAC[p_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN BOUNDER_TAC[]; + ALL_TAC] THEN + ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (14--17) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ASM_REWRITE_TAC[] THEN DISCARD_STATE_TAC "s17" THEN + W(MP_TAC o PART_MATCH (lhand o rand) MOD_CASES o rand o snd) THEN + ANTS_TAC THENL + [TRANS_TAC LTE_TRANS `2 EXP (64 * 4)` THEN + ASM_REWRITE_TAC[p_256k1] THEN CONV_TAC NUM_REDUCE_CONV; + DISCH_THEN SUBST1_TAC] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THENL + [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN + MATCH_MP_TAC EQUAL_FROM_CONGRUENT_REAL THEN + MAP_EVERY EXISTS_TAC [`256`; `&0:real`] THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN CONJ_TAC THENL + [REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + UNDISCH_TAC `m < 2 EXP (64 * 4)` THEN REWRITE_TAC[p_256k1] THEN + ARITH_TAC; + ALL_TAC] THEN + CONJ_TAC THENL [REAL_INTEGER_TAC; ALL_TAC] THEN + RULE_ASSUM_TAC(REWRITE_RULE[NOT_LT]) THEN + ASM_SIMP_TAC[GSYM REAL_OF_NUM_SUB] THEN + EXPAND_TAC "m" THEN REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + ASM_REWRITE_TAC[BITVAL_CLAUSES; p_256k1] THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ALL_TAC] THEN + + (*** Finish off the k = 4 case which is now just the writeback ***) + + FIRST_ASSUM(DISJ_CASES_THEN2 SUBST_ALL_TAC ASSUME_TAC o MATCH_MP (ARITH_RULE + `4 <= k ==> k = 4 \/ 4 < k`)) + THENL + [GHOST_INTRO_TAC `d0:int64` `read X3` THEN + GHOST_INTRO_TAC `d1:int64` `read X4` THEN + GHOST_INTRO_TAC `d2:int64` `read X5` THEN + GHOST_INTRO_TAC `d3:int64` `read X6` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--3) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC; + ALL_TAC] THEN + + SUBGOAL_THEN `0 < k - 4 /\ ~(k - 4 = 0)` STRIP_ASSUME_TAC THENL + [SIMPLE_ARITH_TAC; ALL_TAC] THEN + + (*** Setup of loop invariant ***) + + ENSURES_WHILE_DOWN_TAC `k - 4` `pc + 0x48` `pc + 0x8c` + `\i s. bignum_from_memory(x,k) s = n /\ + read X0 s = z /\ + read X2 s = x /\ + read X1 s = word i /\ + read X12 s = word 0x1000003D1 /\ + bignum_of_wordlist[read X3 s; read X4 s; read X5 s; read X6 s] = + (highdigits n i) MOD p_256k1` THEN + ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL + [VAL_INT64_TAC `k - 4` THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + ALL_TAC; (*** Main loop invariant ***) + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + GHOST_INTRO_TAC `d0:int64` `read X3` THEN + GHOST_INTRO_TAC `d1:int64` `read X4` THEN + GHOST_INTRO_TAC `d2:int64` `read X5` THEN + GHOST_INTRO_TAC `d3:int64` `read X6` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN ARM_STEPS_TAC BIGNUM_MOD_P256K1_EXEC (1--3) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC] THEN + + (*** Mathematics of main loop with decomposition and quotient estimate ***) + + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + GHOST_INTRO_TAC `m1:int64` `read X3` THEN + GHOST_INTRO_TAC `m2:int64` `read X4` THEN + GHOST_INTRO_TAC `m3:int64` `read X5` THEN + GHOST_INTRO_TAC `m4:int64` `read X6` THEN + GLOBALIZE_PRECONDITION_TAC THEN ASM_REWRITE_TAC[] THEN + ABBREV_TAC `m0:int64 = word(bigdigit n i)` THEN + ABBREV_TAC `m = bignum_of_wordlist[m0; m1; m2; m3; m4]` THEN + SUBGOAL_THEN `m < 2 EXP 64 * p_256k1` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + MP_TAC(SPEC `m0:int64` VAL_BOUND_64) THEN + ASM_REWRITE_TAC[p_256k1] THEN ARITH_TAC; + ALL_TAC] THEN + SUBGOAL_THEN `highdigits n i MOD p_256k1 = m MOD p_256k1` SUBST1_TAC THENL + [ONCE_REWRITE_TAC[HIGHDIGITS_STEP] THEN + EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + EXPAND_TAC "m0" THEN + SIMP_TAC[VAL_WORD_EQ; DIMINDEX_64; BIGDIGIT_BOUND] THEN + ASM_REWRITE_TAC[] THEN CONV_TAC MOD_DOWN_CONV THEN + AP_THM_TAC THEN AP_TERM_TAC THEN ARITH_TAC; + ALL_TAC] THEN + MP_TAC(SPEC `m:num` p256k1longredlemma) THEN ASM_REWRITE_TAC[] THEN + LET_TAC THEN STRIP_TAC THEN + + (*** The computation of the quotient estimate q ***) + + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + + ARM_ACCSTEPS_TAC BIGNUM_MOD_P256K1_EXEC [1] (1--2) THEN + + FIRST_X_ASSUM(MP_TAC o SPEC `word q:int64` o MATCH_MP (MESON[] + `!q. read X13 s = q' ==> q' = q ==> read X13 s = q`)) THEN + ANTS_TAC THENL + [EXPAND_TAC "q" THEN + EXPAND_TAC "m" THEN + CONV_TAC(ONCE_DEPTH_CONV BIGNUM_OF_WORDLIST_DIV_CONV) THEN + ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + BOOL_CASES_TAC `carry_s1:bool` THEN + ASM_REWRITE_TAC[BITVAL_CLAUSES; ADD_CLAUSES; MULT_CLAUSES] THEN + DISCH_TAC THENL + [SUBGOAL_THEN + `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = 2 EXP 64 - 1` + SUBST1_TAC THENL + [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; + CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV]; + SUBGOAL_THEN + `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = val m4 + 1` + SUBST1_TAC THENL + [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; + GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN + AP_TERM_TAC THEN ASM_ARITH_TAC]]; + DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN + + (*** The next digit in the current state ***) + + SUBGOAL_THEN `i:num < k` ASSUME_TAC THENL [SIMPLE_ARITH_TAC; ALL_TAC] THEN + MP_TAC(SPECL [`k:num`; `x:int64`; `s2:armstate`; `i:num`] + BIGDIGIT_BIGNUM_FROM_MEMORY) THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + DISCH_THEN(MP_TAC o AP_TERM `word:num->int64` o SYM) THEN + ASM_REWRITE_TAC[WORD_VAL] THEN DISCH_TAC THEN + + (*** A bit of fiddle to make the accumulation tactics work ***) + + ABBREV_TAC `w:int64 = word q` THEN + UNDISCH_THEN `val(w:int64) = q` (SUBST_ALL_TAC o SYM) THEN + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC o end_itlist CONJ) THEN + + (*** Subtraction of q * p_256k1 ***) + + ARM_ACCSTEPS_TAC BIGNUM_MOD_P256K1_EXEC [3;5] (3--6) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_RULE + `word_sub (word (i + 1)) (word 1):int64 = word i`]) THEN + ARM_ACCSTEPS_TAC BIGNUM_MOD_P256K1_EXEC [8;9;10;11;12] (7--12) THEN + + SUBGOAL_THEN + `sum_s12:int64 = word_neg(word(bitval(m < val(w:int64) * p_256k1))) /\ + &(bignum_of_wordlist[sum_s8; sum_s9; sum_s10; sum_s11]) = + if m < val w * p_256k1 then &m - &(val w * p_256k1) + &2 pow 256 + else &m - &(val w * p_256k1)` + STRIP_ASSUME_TAC THENL + [MATCH_MP_TAC MASK_AND_VALUE_FROM_CARRY_LT THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC + [`val(w:int64) * p_256k1 <= m + p_256k1`; + `m < val(w:int64) * p_256k1 + p_256k1`] THEN + REWRITE_TAC[p_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN REAL_ARITH_TAC; + ASM_REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES]] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; CONV_TAC(ONCE_DEPTH_CONV NUM_ADD_CONV)] THEN + ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES; p_256k1] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN + REPEAT(MATCH_MP_TAC INTEGER_ADD THEN CONJ_TAC) THEN + TRY REAL_INTEGER_TAC THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC)] THEN + + (*** Final correction ***) + + ARM_ACCSTEPS_TAC BIGNUM_MOD_P256K1_EXEC [14;15;16;17] (13--17) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + CONV_TAC SYM_CONV THEN MATCH_MP_TAC MOD_UNIQ_BALANCED_REAL THEN + MAP_EVERY EXISTS_TAC [`val(w:int64)`; `256`] THEN + ASM_REWRITE_TAC[] THEN + ABBREV_TAC `b <=> m < val(w:int64) * p_256k1` THEN + REWRITE_TAC[REAL_ARITH + `m - s - (w - b) * n:real = (m - w * n) + (b * n - s)`] THEN + FIRST_ASSUM(MP_TAC o MATCH_MP (MESON[REAL_ADD_RID] + `x = (if p then y + z else y) ==> x = y + (if p then z else &0)`)) THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES; bignum_of_wordlist; p_256k1] THEN + GEN_REWRITE_TAC LAND_CONV [REAL_ARITH `x:real = y + z <=> y = x - z`] THEN + DISCH_THEN SUBST1_TAC THEN + CONJ_TAC THENL [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN POP_ASSUM_LIST(K ALL_TAC) THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC);; + +let BIGNUM_MOD_P256K1_SUBROUTINE_CORRECT = time prove + (`!z k x n pc returnaddress. + nonoverlapping (word pc,0xd0) (z,32) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) bignum_mod_p256k1_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read PC s = returnaddress /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4)])`, + ARM_ADD_RETURN_NOSTACK_TAC BIGNUM_MOD_P256K1_EXEC + BIGNUM_MOD_P256K1_CORRECT);; diff --git a/arm/proofs/subroutine_signatures.ml b/arm/proofs/subroutine_signatures.ml index dfc6c3869..b380c1ab5 100644 --- a/arm/proofs/subroutine_signatures.ml +++ b/arm/proofs/subroutine_signatures.ml @@ -1698,6 +1698,23 @@ let subroutine_signatures = [ ]) ); +("bignum_mod_n256k1", + ([(*args*) + ("z", "uint64_t[static 4]", (*is const?*)"false"); + ("k", "uint64_t", (*is const?*)"false"); + ("x", "uint64_t*", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("x", "k"(* num elems *), 8(* elem bytesize *)); + ], + [(* output buffers *) + ("z", "4"(* num elems *), 8(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + ("bignum_mod_n256k1_4", ([(*args*) ("z", "uint64_t[static 4]", (*is const?*)"false"); @@ -1845,6 +1862,23 @@ let subroutine_signatures = [ ]) ); +("bignum_mod_p256k1", + ([(*args*) + ("z", "uint64_t[static 4]", (*is const?*)"false"); + ("k", "uint64_t", (*is const?*)"false"); + ("x", "uint64_t*", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("x", "k"(* num elems *), 8(* elem bytesize *)); + ], + [(* output buffers *) + ("z", "4"(* num elems *), 8(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + ("bignum_mod_p256k1_4", ([(*args*) ("z", "uint64_t[static 4]", (*is const?*)"false"); diff --git a/arm/secp256k1/Makefile b/arm/secp256k1/Makefile index 5ba07a7d7..fb321612a 100644 --- a/arm/secp256k1/Makefile +++ b/arm/secp256k1/Makefile @@ -27,7 +27,9 @@ OBJ = bignum_add_p256k1.o \ bignum_demont_p256k1.o \ bignum_double_p256k1.o \ bignum_half_p256k1.o \ + bignum_mod_n256k1.o \ bignum_mod_n256k1_4.o \ + bignum_mod_p256k1.o \ bignum_mod_p256k1_4.o \ bignum_montmul_p256k1.o \ bignum_montmul_p256k1_alt.o \ diff --git a/arm/secp256k1/bignum_mod_n256k1.S b/arm/secp256k1/bignum_mod_n256k1.S new file mode 100644 index 000000000..2703d1fb7 --- /dev/null +++ b/arm/secp256k1/bignum_mod_n256k1.S @@ -0,0 +1,172 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo group order, z := x mod n_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Reduction is modulo the group order of the secp256k1 curve. +// +// Standard ARM ABI: X0 = z, X1 = k, X2 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_arm.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n256k1) + .text + .balign 4 + +#define z x0 +#define k x1 +#define x x2 + +#define m0 x3 +#define m1 x4 +#define m2 x5 +#define m3 x6 + +#define t0 x7 +#define t1 x8 +#define t2 x9 +#define t3 x10 +#define t4 x11 + +#define n0 x12 +#define n1 x13 + +// These two are aliased: we only load d when finished with q + +#define q x14 +#define d x14 + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0 __LF \ + movk nn, n1, lsl #16 __LF \ + movk nn, n2, lsl #32 __LF \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(bignum_mod_n256k1): + CFI_START + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmp k, #4 + bcc Lbignum_mod_n256k1_short + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + sub k, k, #4 + lsl t0, k, #3 + add t0, t0, x + ldp m2, m3, [t0, #16] + ldp m0, m1, [t0] + +// Load the two complicated words of 2^256 - n_256k1 = [0; 1; n1; n0] + + movbig( n0, #0x402d, #0xa173, #0x2fc9, #0xbebf) + movbig( n1, #0x4551, #0x2319, #0x50b7, #0x5fc4) + +// Reduce the top 4 digits mod n_256k1 (a conditional subtraction of n_256k1) + + adds t0, m0, n0 + adcs t1, m1, n1 + mov t4, #1 + adcs t2, m2, t4 + adcs t3, m3, xzr + + csel m0, m0, t0, cc + csel m1, m1, t1, cc + csel m2, m2, t2, cc + csel m3, m3, t3, cc + +// Now do (k-4) iterations of 5->4 word modular reduction + + cbz k, Lbignum_mod_n256k1_writeback +Lbignum_mod_n256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because 2^256 - n_256k1 is +// only about 2^129, so the true quotient is m3 or m3 + 1. + + adds q, m3, #1 + csinv q, q, xzr, cc + +// [t3;t2;t1;t0] = q * (2^256 - n_256k1) = q * [0; 1; n1; n0] + + mul t0, n0, q + mul t1, n1, q + umulh t2, n0, q + adds t1, t1, t2 + umulh t2, n1, q + adc t2, t2, xzr // No carry: high of mul + {0,1} + adds t2, t2, q // add q * 1 at word 2 + adc t3, xzr, xzr + +// Compensate for 2^256 * q + + sub m3, m3, q + +// Decrement k and load the next digit (note that d aliases to q) + + sub k, k, #1 + ldr d, [x, k, lsl #3] + +// [t4;t3;t2;t1;t0] = [m3;m2;m1;m0;d] - q * n_256k1 + + adds t0, d, t0 + adcs t1, m0, t1 + adcs t2, m1, t2 + adcs t3, m2, t3 + adc t4, m3, xzr + +// Now our top word t4 is either zero or all 1s. Use it for a masked +// addition of n_256k1, which we can do by a *subtraction* of +// 2^256 - n_256k1 from our portion, re-using the constants + + and d, t4, n0 + subs m0, t0, d + and d, t4, n1 + sbcs m1, t1, d + and d, t4, #1 + sbcs m2, t2, d + sbc m3, t3, xzr + + cbnz k, Lbignum_mod_n256k1_loop + +// Finally write back [m3;m2;m1;m0] and return + +Lbignum_mod_n256k1_writeback: + stp m0, m1, [z] + stp m2, m3, [z, #16] + CFI_RET + +// Short case: just copy the input with zero-padding + +Lbignum_mod_n256k1_short: + mov m0, xzr + mov m1, xzr + mov m2, xzr + mov m3, xzr + + cbz k, Lbignum_mod_n256k1_writeback + ldr m0, [x] + subs k, k, #1 + beq Lbignum_mod_n256k1_writeback + ldr m1, [x, #8] + subs k, k, #1 + beq Lbignum_mod_n256k1_writeback + ldr m2, [x, #16] + b Lbignum_mod_n256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_n256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/arm/secp256k1/bignum_mod_p256k1.S b/arm/secp256k1/bignum_mod_p256k1.S new file mode 100644 index 000000000..6c18491a5 --- /dev/null +++ b/arm/secp256k1/bignum_mod_p256k1.S @@ -0,0 +1,154 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo field characteristic, z := x mod p_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_p256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Standard ARM ABI: X0 = z, X1 = k, X2 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_arm.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p256k1) + .text + .balign 4 + +#define z x0 +#define k x1 +#define x x2 + +#define m0 x3 +#define m1 x4 +#define m2 x5 +#define m3 x6 + +#define t0 x7 +#define t1 x8 +#define t2 x9 +#define t3 x10 +#define t4 x11 + +#define c x12 + +// These two are aliased: we only load d when finished with q + +#define q x13 +#define d x13 + + +S2N_BN_SYMBOL(bignum_mod_p256k1): + CFI_START + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmp k, #4 + bcc Lbignum_mod_p256k1_short + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + sub k, k, #4 + lsl t0, k, #3 + add t0, t0, x + ldp m2, m3, [t0, #16] + ldp m0, m1, [t0] + +// Load the constant c = 4294968273, i.e. p_256k1 = 2^256 - c + + mov c, #977 + orr c, c, #0x100000000 + +// Reduce the top 4 digits mod p_256k1 (a conditional subtraction of p_256k1) + + adds t0, m0, c + adcs t1, m1, xzr + adcs t2, m2, xzr + adcs t3, m3, xzr + + csel m0, m0, t0, cc + csel m1, m1, t1, cc + csel m2, m2, t2, cc + csel m3, m3, t3, cc + +// Now do (k-4) iterations of 5->4 word modular reduction + + cbz k, Lbignum_mod_p256k1_writeback +Lbignum_mod_p256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because p_256k1 = 2^256 - c +// with c only 33 bits, so the true quotient is m3 or m3 + 1. + + adds q, m3, #1 + csinv q, q, xzr, cc + +// Now the quotient estimate is q, and then we do the reduction, writing +// z = [m3;m2;m1;m0], as z' = (2^256 * m3 + z) - q * p_256k1 = +// (2^256 * (m3 - q) + z) + q * c + + mul t0, q, c + umulh t1, q, c + +// Compensate for 2^256 * q + + sub m3, m3, q + +// Decrement k and load the next digit (note d aliases q) + + sub k, k, #1 + ldr d, [x, k, lsl #3] + +// [t4;t3;t2;t1;t0] = [m3;m2;m1;m0;d] - q * p_256k1 + + adds t0, d, t0 + adcs t1, m0, t1 + adcs t2, m1, xzr + adcs t3, m2, xzr + adc t4, m3, xzr + +// Now our top word t4 is either zero or all 1s. Use it for a masked +// addition of p_256k1, which we can do by a *subtraction* of c. + + and d, t4, c + subs m0, t0, d + sbcs m1, t1, xzr + sbcs m2, t2, xzr + sbc m3, t3, xzr + + cbnz k, Lbignum_mod_p256k1_loop + +// Finally write back [m3;m2;m1;m0] and return + +Lbignum_mod_p256k1_writeback: + stp m0, m1, [z] + stp m2, m3, [z, #16] + CFI_RET + +// Short case: just copy the input with zero-padding + +Lbignum_mod_p256k1_short: + mov m0, xzr + mov m1, xzr + mov m2, xzr + mov m3, xzr + + cbz k, Lbignum_mod_p256k1_writeback + ldr m0, [x] + subs k, k, #1 + beq Lbignum_mod_p256k1_writeback + ldr m1, [x, #8] + subs k, k, #1 + beq Lbignum_mod_p256k1_writeback + ldr m2, [x, #16] + b Lbignum_mod_p256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_p256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/benchmarks/benchmark.c b/benchmarks/benchmark.c index ae83eb5ab..3ad5dd883 100644 --- a/benchmarks/benchmark.c +++ b/benchmarks/benchmark.c @@ -808,6 +808,8 @@ void call_bignum_mod_n256_4(void) repeat(bignum_mod_n256_4(b0,b1)) void call_bignum_mod_p256_4(void) repeat(bignum_mod_p256_4(b0,b1)) void call_bignum_mod_n256k1_4(void) repeat(bignum_mod_n256k1_4(b0,b1)) void call_bignum_mod_p256k1_4(void) repeat(bignum_mod_p256k1_4(b0,b1)) +void call_bignum_mod_n256k1__8(void) repeat(bignum_mod_n256k1(b0,8,b1)) +void call_bignum_mod_p256k1__8(void) repeat(bignum_mod_p256k1(b0,8,b1)) void call_bignum_mod_n384_6(void) repeat(bignum_mod_n384_6(b0,b1)) void call_bignum_mod_p384_6(void) repeat(bignum_mod_p384_6(b0,b1)) void call_bignum_mod_n521_9(void) repeat(bignum_mod_n521_9(b0,b1)) @@ -1343,6 +1345,7 @@ int main(int argc, char *argv[]) timingtest(bmi,"bignum_mod_n256 (8 -> 4)",call_bignum_mod_n256__8); timingtest(all,"bignum_mod_n256_alt (8 -> 4)",call_bignum_mod_n256_alt__8); timingtest(all,"bignum_mod_n256_4",call_bignum_mod_n256_4); + timingtest(all,"bignum_mod_n256k1 (8 -> 4)",call_bignum_mod_n256k1__8); timingtest(all,"bignum_mod_n256k1_4",call_bignum_mod_n256k1_4); timingtest(bmi,"bignum_mod_n384 (12 -> 6)",call_bignum_mod_n384__12); timingtest(all,"bignum_mod_n384_alt (12 -> 6)",call_bignum_mod_n384_alt__12); @@ -1356,6 +1359,7 @@ int main(int argc, char *argv[]) timingtest(bmi,"bignum_mod_p256 (8 -> 4)",call_bignum_mod_p256__8); timingtest(all,"bignum_mod_p256_alt (8 -> 4)",call_bignum_mod_p256_alt__8); timingtest(all,"bignum_mod_p256_4",call_bignum_mod_p256_4); + timingtest(all,"bignum_mod_p256k1 (8 -> 4)",call_bignum_mod_p256k1__8); timingtest(all,"bignum_mod_p256k1_4",call_bignum_mod_p256k1_4); timingtest(bmi,"bignum_mod_p384 (12 -> 6)",call_bignum_mod_p384__12); timingtest(all,"bignum_mod_p384_alt (12 -> 6)",call_bignum_mod_p384_alt__12); diff --git a/include/s2n-bignum-c89.h b/include/s2n-bignum-c89.h index 2ddf0aac3..207eba072 100644 --- a/include/s2n-bignum-c89.h +++ b/include/s2n-bignum-c89.h @@ -444,6 +444,10 @@ extern void bignum_mod_n256_alt (uint64_t z[4], uint64_t k, const uint64_t *x); /* Input x[4]; output z[4] */ extern void bignum_mod_n256_4 (uint64_t z[4], const uint64_t x[4]); +/* Reduce modulo group order, z := x mod n_256k1 */ +/* Input x[k]; output z[4] */ +extern void bignum_mod_n256k1 (uint64_t z[4], uint64_t k, const uint64_t *x); + /* Reduce modulo group order, z := x mod n_256k1 */ /* Input x[4]; output z[4] */ extern void bignum_mod_n256k1_4 (uint64_t z[4], const uint64_t x[4]); @@ -484,6 +488,10 @@ extern void bignum_mod_p256_alt (uint64_t z[4], uint64_t k, const uint64_t *x); /* Input x[4]; output z[4] */ extern void bignum_mod_p256_4 (uint64_t z[4], const uint64_t x[4]); +/* Reduce modulo field characteristic, z := x mod p_256k1 */ +/* Input x[k]; output z[4] */ +extern void bignum_mod_p256k1 (uint64_t z[4], uint64_t k, const uint64_t *x); + /* Reduce modulo field characteristic, z := x mod p_256k1 */ /* Input x[4]; output z[4] */ extern void bignum_mod_p256k1_4 (uint64_t z[4], const uint64_t x[4]); diff --git a/include/s2n-bignum.h b/include/s2n-bignum.h index fb2f41818..efb763a4c 100644 --- a/include/s2n-bignum.h +++ b/include/s2n-bignum.h @@ -449,6 +449,10 @@ extern void bignum_mod_n256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, co // Input x[4]; output z[4] extern void bignum_mod_n256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); +// Reduce modulo group order, z := x mod n_256k1 +// Input x[k]; output z[4] +extern void bignum_mod_n256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); + // Reduce modulo group order, z := x mod n_256k1 // Input x[4]; output z[4] extern void bignum_mod_n256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); @@ -489,6 +493,10 @@ extern void bignum_mod_p256_alt (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, co // Input x[4]; output z[4] extern void bignum_mod_p256_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); +// Reduce modulo field characteristic, z := x mod p_256k1 +// Input x[k]; output z[4] +extern void bignum_mod_p256k1 (uint64_t z[S2N_BIGNUM_STATIC 4], uint64_t k, const uint64_t *x); + // Reduce modulo field characteristic, z := x mod p_256k1 // Input x[4]; output z[4] extern void bignum_mod_p256k1_4 (uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]); diff --git a/tests/test.c b/tests/test.c index a7fbf298a..2be50e899 100644 --- a/tests/test.c +++ b/tests/test.c @@ -6598,6 +6598,34 @@ int test_bignum_mod_n256_4(void) return 0; } +int test_bignum_mod_n256k1(void) +{ uint64_t t, k; + printf("Testing bignum_mod_n256k1 with %d cases\n",tests); + int c; + for (t = 0; t < tests; ++t) + { k = (unsigned) rand() % MAXSIZE; + random_bignum(k,b0); + reference_copy(k,b1,4,n_256k1); + reference_mod(k,b3,b0,b1); + bignum_mod_n256k1(b4,k,b0); + c = reference_compare(k,(k < 4) ? b0 : b3,4,b4); + if (c != 0) + { printf("### Disparity: [size %4"PRIu64" -> %4"PRIu64"] " + "0x%016"PRIx64"...%016"PRIx64" mod n_256k1 = " + "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", + k,UINT64_C(4),b0[k-1],b0[0],b4[3],b4[0],b3[3],b3[0]); + return 1; + } + else if (VERBOSE) + { printf("OK: [size %4"PRIu64" -> %4"PRIu64"] 0x%016"PRIx64"...%016"PRIx64" mod n_256k1 = " + "0x%016"PRIx64"...%016"PRIx64"\n", + k,UINT64_C(4),b0[k-1],b0[0],b4[3],b4[0]); + } + } + printf("All OK\n"); + return 0; +} + int test_bignum_mod_n256k1_4(void) { uint64_t t; printf("Testing bignum_mod_n256k1_4 with %d cases\n",tests); @@ -7015,6 +7043,34 @@ int test_bignum_mod_p256_4(void) return 0; } +int test_bignum_mod_p256k1(void) +{ uint64_t t, k; + printf("Testing bignum_mod_p256k1 with %d cases\n",tests); + int c; + for (t = 0; t < tests; ++t) + { k = (unsigned) rand() % MAXSIZE; + random_bignum(k,b0); + reference_copy(k,b1,4,p_256k1); + reference_mod(k,b3,b0,b1); + bignum_mod_p256k1(b4,k,b0); + c = reference_compare(k,(k < 4) ? b0 : b3,4,b4); + if (c != 0) + { printf("### Disparity: [size %4"PRIu64" -> %4"PRIu64"] " + "0x%016"PRIx64"...%016"PRIx64" mod p_256k1 = " + "0x%016"PRIx64"...%016"PRIx64" not 0x%016"PRIx64"...%016"PRIx64"\n", + k,UINT64_C(4),b0[k-1],b0[0],b4[3],b4[0],b3[3],b3[0]); + return 1; + } + else if (VERBOSE) + { printf("OK: [size %4"PRIu64" -> %4"PRIu64"] 0x%016"PRIx64"...%016"PRIx64" mod p_256k1 = " + "0x%016"PRIx64"...%016"PRIx64"\n", + k,UINT64_C(4),b0[k-1],b0[0],b4[3],b4[0]); + } + } + printf("All OK\n"); + return 0; +} + int test_bignum_mod_p256k1_4(void) { uint64_t t; printf("Testing bignum_mod_p256k1_4 with %d cases\n",tests); @@ -15747,6 +15803,7 @@ int main(int argc, char *argv[]) functionaltest(bmi,"bignum_mod_n256",test_bignum_mod_n256); functionaltest(all,"bignum_mod_n256_4",test_bignum_mod_n256_4); functionaltest(all,"bignum_mod_n256_alt",test_bignum_mod_n256_alt); + functionaltest(all,"bignum_mod_n256k1",test_bignum_mod_n256k1); functionaltest(all,"bignum_mod_n256k1_4",test_bignum_mod_n256k1_4); functionaltest(bmi,"bignum_mod_n384",test_bignum_mod_n384); functionaltest(all,"bignum_mod_n384_6",test_bignum_mod_n384_6); @@ -15760,6 +15817,7 @@ int main(int argc, char *argv[]) functionaltest(bmi,"bignum_mod_p256",test_bignum_mod_p256); functionaltest(all,"bignum_mod_p256_4",test_bignum_mod_p256_4); functionaltest(all,"bignum_mod_p256_alt",test_bignum_mod_p256_alt); + functionaltest(all,"bignum_mod_p256k1",test_bignum_mod_p256k1); functionaltest(all,"bignum_mod_p256k1_4",test_bignum_mod_p256k1_4); functionaltest(bmi,"bignum_mod_p384",test_bignum_mod_p384); functionaltest(all,"bignum_mod_p384_6",test_bignum_mod_p384_6); diff --git a/tools/collect-signatures.py b/tools/collect-signatures.py index 05fdbf6e5..2cd93a28a 100644 --- a/tools/collect-signatures.py +++ b/tools/collect-signatures.py @@ -295,6 +295,8 @@ def stripPrefixes(s, prefixes): "bignum_copy_row_from_table_16", "bignum_copy_row_from_table_32", "bignum_emontredc_8n_cdiff", + "bignum_mod_n256k1", + "bignum_mod_p256k1", "curve25519_x25519_byte", "curve25519_x25519_byte_alt", "sha3_", From 23ad505f3d374337bce7d036f5d05553e5782c46 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 25 Feb 2026 16:42:29 +0000 Subject: [PATCH 2/6] Add generic-size bignum_mod_p256k1 and bignum_mod_n256k1 for x86 Co-Authored-By: Claude Opus 4.6 --- x86/Makefile | 2 + x86/secp256k1/Makefile | 2 + x86/secp256k1/bignum_mod_n256k1.S | 214 ++++++++++++++++++++++++++ x86/secp256k1/bignum_mod_p256k1.S | 190 +++++++++++++++++++++++ x86_att/Makefile | 2 + x86_att/secp256k1/bignum_mod_n256k1.S | 214 ++++++++++++++++++++++++++ x86_att/secp256k1/bignum_mod_p256k1.S | 190 +++++++++++++++++++++++ 7 files changed, 814 insertions(+) create mode 100644 x86/secp256k1/bignum_mod_n256k1.S create mode 100644 x86/secp256k1/bignum_mod_p256k1.S create mode 100644 x86_att/secp256k1/bignum_mod_n256k1.S create mode 100644 x86_att/secp256k1/bignum_mod_p256k1.S diff --git a/x86/Makefile b/x86/Makefile index db5d6ec13..a49066584 100644 --- a/x86/Makefile +++ b/x86/Makefile @@ -363,7 +363,9 @@ BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ secp256k1/bignum_demont_p256k1.o \ secp256k1/bignum_double_p256k1.o \ secp256k1/bignum_half_p256k1.o \ + secp256k1/bignum_mod_n256k1.o \ secp256k1/bignum_mod_n256k1_4.o \ + secp256k1/bignum_mod_p256k1.o \ secp256k1/bignum_mod_p256k1_4.o \ secp256k1/bignum_montmul_p256k1.o \ secp256k1/bignum_montmul_p256k1_alt.o \ diff --git a/x86/secp256k1/Makefile b/x86/secp256k1/Makefile index f1a95c3ec..3c384e253 100644 --- a/x86/secp256k1/Makefile +++ b/x86/secp256k1/Makefile @@ -12,7 +12,9 @@ OBJ = bignum_add_p256k1.o \ bignum_demont_p256k1.o \ bignum_double_p256k1.o \ bignum_half_p256k1.o \ + bignum_mod_n256k1.o \ bignum_mod_n256k1_4.o \ + bignum_mod_p256k1.o \ bignum_mod_p256k1_4.o \ bignum_montmul_p256k1.o \ bignum_montmul_p256k1_alt.o \ diff --git a/x86/secp256k1/bignum_mod_n256k1.S b/x86/secp256k1/bignum_mod_n256k1.S new file mode 100644 index 000000000..e25477d37 --- /dev/null +++ b/x86/secp256k1/bignum_mod_n256k1.S @@ -0,0 +1,214 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo group order, z := x mod n_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Reduction is modulo the group order of the secp256k1 curve. +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_x86.h" + + .intel_syntax noprefix + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n256k1) + .text + +#define z rdi +#define k rsi +#define x rcx + +#define m0 r8 +#define m1 r9 +#define m2 r10 +#define m3 r11 +#define d r12 + +#define q rbx +#define t r13 + +S2N_BN_SYMBOL(bignum_mod_n256k1): + CFI_START + _CET_ENDBR + +#if WINDOWS_ABI + CFI_PUSH(rdi) + CFI_PUSH(rsi) + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 +#endif + +// Save extra registers + + CFI_PUSH(rbx) + CFI_PUSH(r12) + CFI_PUSH(r13) + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmp k, 4 + jc Lbignum_mod_n256k1_shortinput + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + sub k, 4 + mov m3, [rdx+8*k+24] + mov m2, [rdx+8*k+16] + mov m1, [rdx+8*k+8] + mov m0, [rdx+8*k] + +// Move x into another register to leave rdx free for multiplies + + mov x, rdx + +// Reduce the top 4 digits mod n_256k1 (conditional subtraction of n_256k1). +// We add 2^256 - n_256k1 = [0; 1; 0x4551231950b75fc4; 0x402da1732fc9bebf]; +// if this carries, value >= n_256k1 so keep the reduced value. + + mov rax, 0x402da1732fc9bebf + mov q, 0x4551231950b75fc4 + + add m0, rax + adc m1, q + adc m2, 1 + adc m3, 0 + sbb d, d + not d + and rax, d + and q, d + and d, 1 + sub m0, rax + sbb m1, q + sbb m2, d + sbb m3, 0 + +// Now do (k-4) iterations of 5->4 word modular reduction + + test k, k + jz Lbignum_mod_n256k1_writeback + +Lbignum_mod_n256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because 2^256 - n_256k1 is +// only about 2^129, so the true quotient is m3 or m3 + 1. + + mov rax, m3 + add rax, 1 + sbb rdx, rdx + or rax, rdx + mov q, rax + +// Load the next digit so current m to reduce = [m3;m2;m1;m0;d] + + mov d, [x+8*k-8] + +// Compensate m3 for 2^256 * q + + sub m3, q + +// Compute [t3;t2;t1;t0] = q * (2^256 - n_256k1) = q * [0; 1; n1; n0] +// First multiply: q * n0 + + mov rax, 0x402da1732fc9bebf + mul q + mov t, rdx + add d, rax + adc t, 0 + +// Second multiply: q * n1 + + mov rax, 0x4551231950b75fc4 + mul q + add t, rax + adc rdx, 0 + add rdx, q + mov eax, 0 + adc rax, 0 + +// Add product to residue [m3;m2;m1;m0] (t0 already added to d) + + add m0, t + adc m1, rdx + adc m2, rax + adc m3, 0 + +// Now our top word m3 is either zero or all 1s. Use it for a masked +// addition of n_256k1, which we can do by a *subtraction* of +// 2^256 - n_256k1 from our portion. Compute all masks first since +// AND clears CF, which would break the sub/sbb borrow chain. + + mov rax, 0x402da1732fc9bebf + and rax, m3 + mov rdx, 0x4551231950b75fc4 + and rdx, m3 + mov t, 1 + and t, m3 + + sub d, rax + sbb m0, rdx + sbb m1, t + sbb m2, 0 + +// Shuffle registers up and loop + + mov m3, m2 + mov m2, m1 + mov m1, m0 + mov m0, d + + dec k + jnz Lbignum_mod_n256k1_loop + +// Write back + +Lbignum_mod_n256k1_writeback: + + mov [z], m0 + mov [z+8], m1 + mov [z+16], m2 + mov [z+24], m3 + +// Restore registers and return + + CFI_POP(r13) + CFI_POP(r12) + CFI_POP(rbx) +#if WINDOWS_ABI + CFI_POP(rsi) + CFI_POP(rdi) +#endif + CFI_RET + +Lbignum_mod_n256k1_shortinput: + + xor m0, m0 + xor m1, m1 + xor m2, m2 + xor m3, m3 + + test k, k + jz Lbignum_mod_n256k1_writeback + mov m0, [rdx] + dec k + jz Lbignum_mod_n256k1_writeback + mov m1, [rdx + 8] + dec k + jz Lbignum_mod_n256k1_writeback + mov m2, [rdx + 16] + jmp Lbignum_mod_n256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_n256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86/secp256k1/bignum_mod_p256k1.S b/x86/secp256k1/bignum_mod_p256k1.S new file mode 100644 index 000000000..8ba379971 --- /dev/null +++ b/x86/secp256k1/bignum_mod_p256k1.S @@ -0,0 +1,190 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo field characteristic, z := x mod p_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_p256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_x86.h" + + .intel_syntax noprefix + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p256k1) + .text + +#define z rdi +#define k rsi +#define x rcx + +#define m0 r8 +#define m1 r9 +#define m2 r10 +#define m3 r11 +#define d r12 + +#define c rbx + +S2N_BN_SYMBOL(bignum_mod_p256k1): + CFI_START + _CET_ENDBR + +#if WINDOWS_ABI + CFI_PUSH(rdi) + CFI_PUSH(rsi) + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 +#endif + +// Save extra registers + + CFI_PUSH(rbx) + CFI_PUSH(r12) + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmp k, 4 + jc Lbignum_mod_p256k1_shortinput + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + sub k, 4 + mov m3, [rdx+8*k+24] + mov m2, [rdx+8*k+16] + mov m1, [rdx+8*k+8] + mov m0, [rdx+8*k] + +// Move x into another register to leave rdx free for multiplies + + mov x, rdx + +// Load the constant c = 4294968273, i.e. p_256k1 = 2^256 - c + + mov c, 0x1000003D1 + +// Reduce the top 4 digits mod p_256k1 (conditional subtraction of p_256k1). +// We add c = 2^256 - p_256k1; if this carries, value >= p_256k1 so keep. + + add m0, c + adc m1, 0 + adc m2, 0 + adc m3, 0 + +// If no carry (CF=0), the value was < p_256k1, so undo the addition + + sbb rax, rax + not rax + and rax, c + sub m0, rax + sbb m1, 0 + sbb m2, 0 + sbb m3, 0 + +// Now do (k-4) iterations of 5->4 word modular reduction + + test k, k + jz Lbignum_mod_p256k1_writeback + +Lbignum_mod_p256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because p_256k1 = 2^256 - c +// with c only 33 bits, so the true quotient is m3 or m3 + 1. + + mov rax, m3 + add rax, 1 + sbb rdx, rdx + or rax, rdx + +// Load the next digit so current m to reduce = [m3;m2;m1;m0;d] + + mov d, [x+8*k-8] + +// Now form [m3;m2;m1;m0;d] = m - q * p_256k1 +// = m - q * (2^256 - c) = m - q*2^256 + q*c +// First compensate for the 2^256 * q term + + sub m3, rax + +// Now [rdx;rax] = q * c (mul uses rax as implicit source, q is still in rax) + + mul c + +// [m3;m2;m1;m0;d] += [0;0;0;rdx;rax] + + add d, rax + adc m0, rdx + adc m1, 0 + adc m2, 0 + adc m3, 0 + +// Now our top word m3 is either zero or all 1s. Use it for a masked +// subtraction of c (equivalently a masked addition of p_256k1) + + mov rax, c + and rax, m3 + sub d, rax + sbb m0, 0 + sbb m1, 0 + sbb m2, 0 + +// Shuffle registers up and loop + + mov m3, m2 + mov m2, m1 + mov m1, m0 + mov m0, d + + dec k + jnz Lbignum_mod_p256k1_loop + +// Write back + +Lbignum_mod_p256k1_writeback: + + mov [z], m0 + mov [z+8], m1 + mov [z+16], m2 + mov [z+24], m3 + +// Restore registers and return + + CFI_POP(r12) + CFI_POP(rbx) +#if WINDOWS_ABI + CFI_POP(rsi) + CFI_POP(rdi) +#endif + CFI_RET + +Lbignum_mod_p256k1_shortinput: + + xor m0, m0 + xor m1, m1 + xor m2, m2 + xor m3, m3 + + test k, k + jz Lbignum_mod_p256k1_writeback + mov m0, [rdx] + dec k + jz Lbignum_mod_p256k1_writeback + mov m1, [rdx + 8] + dec k + jz Lbignum_mod_p256k1_writeback + mov m2, [rdx + 16] + jmp Lbignum_mod_p256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_p256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86_att/Makefile b/x86_att/Makefile index 567cd246a..ba3fd7a9b 100644 --- a/x86_att/Makefile +++ b/x86_att/Makefile @@ -279,7 +279,9 @@ OBJ = curve25519/bignum_add_p25519.o \ secp256k1/bignum_demont_p256k1.o \ secp256k1/bignum_double_p256k1.o \ secp256k1/bignum_half_p256k1.o \ + secp256k1/bignum_mod_n256k1.o \ secp256k1/bignum_mod_n256k1_4.o \ + secp256k1/bignum_mod_p256k1.o \ secp256k1/bignum_mod_p256k1_4.o \ secp256k1/bignum_montmul_p256k1.o \ secp256k1/bignum_montmul_p256k1_alt.o \ diff --git a/x86_att/secp256k1/bignum_mod_n256k1.S b/x86_att/secp256k1/bignum_mod_n256k1.S new file mode 100644 index 000000000..d74247e9a --- /dev/null +++ b/x86_att/secp256k1/bignum_mod_n256k1.S @@ -0,0 +1,214 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo group order, z := x mod n_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_n256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Reduction is modulo the group order of the secp256k1 curve. +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_x86_att.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_n256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_n256k1) + .text + +#define z %rdi +#define k %rsi +#define x %rcx + +#define m0 %r8 +#define m1 %r9 +#define m2 %r10 +#define m3 %r11 +#define d %r12 + +#define q %rbx +#define t %r13 + +S2N_BN_SYMBOL(bignum_mod_n256k1): + CFI_START + _CET_ENDBR + +#if WINDOWS_ABI + CFI_PUSH(%rdi) + CFI_PUSH(%rsi) + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save extra registers + + CFI_PUSH(%rbx) + CFI_PUSH(%r12) + CFI_PUSH(%r13) + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmpq $4, k + jc Lbignum_mod_n256k1_shortinput + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + subq $4, k + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 + +// Move x into another register to leave %rdx free for multiplies + + movq %rdx, x + +// Reduce the top 4 digits mod n_256k1 (conditional subtraction of n_256k1). +// We add 2^256 - n_256k1 = [0; 1; 0x4551231950b75fc4; 0x402da1732fc9bebf]; +// if this carries, value >= n_256k1 so keep the reduced value. + + movq $0x402da1732fc9bebf, %rax + movq $0x4551231950b75fc4, q + + addq %rax, m0 + adcq q, m1 + adcq $1, m2 + adcq $0, m3 + sbbq d, d + notq d + andq d, %rax + andq d, q + andq $1, d + subq %rax, m0 + sbbq q, m1 + sbbq d, m2 + sbbq $0, m3 + +// Now do (k-4) iterations of 5->4 word modular reduction + + testq k, k + jz Lbignum_mod_n256k1_writeback + +Lbignum_mod_n256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because 2^256 - n_256k1 is +// only about 2^129, so the true quotient is m3 or m3 + 1. + + movq m3, %rax + addq $1, %rax + sbbq %rdx, %rdx + orq %rdx, %rax + movq %rax, q + +// Load the next digit so current m to reduce = [m3;m2;m1;m0;d] + + movq -8(x,k,8), d + +// Compensate m3 for 2^256 * q + + subq q, m3 + +// Compute [t3;t2;t1;t0] = q * (2^256 - n_256k1) = q * [0; 1; n1; n0] +// First multiply: q * n0 + + movq $0x402da1732fc9bebf, %rax + mulq q + movq %rdx, t + addq %rax, d + adcq $0, t + +// Second multiply: q * n1 + + movq $0x4551231950b75fc4, %rax + mulq q + addq %rax, t + adcq $0, %rdx + addq q, %rdx + movl $0, %eax + adcq $0, %rax + +// Add product to residue [m3;m2;m1;m0] (t0 already added to d) + + addq t, m0 + adcq %rdx, m1 + adcq %rax, m2 + adcq $0, m3 + +// Now our top word m3 is either zero or all 1s. Use it for a masked +// addition of n_256k1, which we can do by a *subtraction* of +// 2^256 - n_256k1 from our portion. Compute all masks first since +// AND clears CF, which would break the sub/sbb borrow chain. + + movq $0x402da1732fc9bebf, %rax + andq m3, %rax + movq $0x4551231950b75fc4, %rdx + andq m3, %rdx + movq $1, t + andq m3, t + + subq %rax, d + sbbq %rdx, m0 + sbbq t, m1 + sbbq $0, m2 + +// Shuffle registers up and loop + + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 + + decq k + jnz Lbignum_mod_n256k1_loop + +// Write back + +Lbignum_mod_n256k1_writeback: + + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + +// Restore registers and return + + CFI_POP(%r13) + CFI_POP(%r12) + CFI_POP(%rbx) +#if WINDOWS_ABI + CFI_POP(%rsi) + CFI_POP(%rdi) +#endif + CFI_RET + +Lbignum_mod_n256k1_shortinput: + + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + + testq k, k + jz Lbignum_mod_n256k1_writeback + movq (%rdx), m0 + decq k + jz Lbignum_mod_n256k1_writeback + movq 8(%rdx), m1 + decq k + jz Lbignum_mod_n256k1_writeback + movq 16(%rdx), m2 + jmp Lbignum_mod_n256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_n256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/x86_att/secp256k1/bignum_mod_p256k1.S b/x86_att/secp256k1/bignum_mod_p256k1.S new file mode 100644 index 000000000..6a078971d --- /dev/null +++ b/x86_att/secp256k1/bignum_mod_p256k1.S @@ -0,0 +1,190 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Reduce modulo field characteristic, z := x mod p_256k1 +// Input x[k]; output z[4] +// +// extern void bignum_mod_p256k1(uint64_t z[static 4], uint64_t k, +// const uint64_t *x); +// +// Standard x86-64 ABI: RDI = z, RSI = k, RDX = x +// Microsoft x64 ABI: RCX = z, RDX = k, R8 = x +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum_x86_att.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_FUNCTION_TYPE_DIRECTIVE(bignum_mod_p256k1) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mod_p256k1) + .text + +#define z %rdi +#define k %rsi +#define x %rcx + +#define m0 %r8 +#define m1 %r9 +#define m2 %r10 +#define m3 %r11 +#define d %r12 + +#define c %rbx + +S2N_BN_SYMBOL(bignum_mod_p256k1): + CFI_START + _CET_ENDBR + +#if WINDOWS_ABI + CFI_PUSH(%rdi) + CFI_PUSH(%rsi) + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx +#endif + +// Save extra registers + + CFI_PUSH(%rbx) + CFI_PUSH(%r12) + +// If the input is already <= 3 words long, go to a trivial "copy" path + + cmpq $4, k + jc Lbignum_mod_p256k1_shortinput + +// Otherwise load the top 4 digits (top-down) and reduce k by 4 + + subq $4, k + movq 24(%rdx,k,8), m3 + movq 16(%rdx,k,8), m2 + movq 8(%rdx,k,8), m1 + movq (%rdx,k,8), m0 + +// Move x into another register to leave %rdx free for multiplies + + movq %rdx, x + +// Load the constant c = 4294968273, i.e. p_256k1 = 2^256 - c + + movq $0x1000003D1, c + +// Reduce the top 4 digits mod p_256k1 (conditional subtraction of p_256k1). +// We add c = 2^256 - p_256k1; if this carries, value >= p_256k1 so keep. + + addq c, m0 + adcq $0, m1 + adcq $0, m2 + adcq $0, m3 + +// If no carry (CF=0), the value was < p_256k1, so undo the addition + + sbbq %rax, %rax + notq %rax + andq c, %rax + subq %rax, m0 + sbbq $0, m1 + sbbq $0, m2 + sbbq $0, m3 + +// Now do (k-4) iterations of 5->4 word modular reduction + + testq k, k + jz Lbignum_mod_p256k1_writeback + +Lbignum_mod_p256k1_loop: + +// Writing the input as z = 2^256 * m3 + rest, our quotient approximation is +// q = min(m3 + 1, 2^64 - 1). This is accurate because p_256k1 = 2^256 - c +// with c only 33 bits, so the true quotient is m3 or m3 + 1. + + movq m3, %rax + addq $1, %rax + sbbq %rdx, %rdx + orq %rdx, %rax + +// Load the next digit so current m to reduce = [m3;m2;m1;m0;d] + + movq -8(x,k,8), d + +// Now form [m3;m2;m1;m0;d] = m - q * p_256k1 +// = m - q * (2^256 - c) = m - q*2^256 + q*c +// First compensate for the 2^256 * q term + + subq %rax, m3 + +// Now [%rdx;%rax] = q * c (mul uses %rax as implicit source, q is still in %rax) + + mulq c + +// [m3;m2;m1;m0;d] += [0;0;0;%rdx;%rax] + + addq %rax, d + adcq %rdx, m0 + adcq $0, m1 + adcq $0, m2 + adcq $0, m3 + +// Now our top word m3 is either zero or all 1s. Use it for a masked +// subtraction of c (equivalently a masked addition of p_256k1) + + movq c, %rax + andq m3, %rax + subq %rax, d + sbbq $0, m0 + sbbq $0, m1 + sbbq $0, m2 + +// Shuffle registers up and loop + + movq m2, m3 + movq m1, m2 + movq m0, m1 + movq d, m0 + + decq k + jnz Lbignum_mod_p256k1_loop + +// Write back + +Lbignum_mod_p256k1_writeback: + + movq m0, (z) + movq m1, 8(z) + movq m2, 16(z) + movq m3, 24(z) + +// Restore registers and return + + CFI_POP(%r12) + CFI_POP(%rbx) +#if WINDOWS_ABI + CFI_POP(%rsi) + CFI_POP(%rdi) +#endif + CFI_RET + +Lbignum_mod_p256k1_shortinput: + + xorq m0, m0 + xorq m1, m1 + xorq m2, m2 + xorq m3, m3 + + testq k, k + jz Lbignum_mod_p256k1_writeback + movq (%rdx), m0 + decq k + jz Lbignum_mod_p256k1_writeback + movq 8(%rdx), m1 + decq k + jz Lbignum_mod_p256k1_writeback + movq 16(%rdx), m2 + jmp Lbignum_mod_p256k1_writeback + +S2N_BN_SIZE_DIRECTIVE(bignum_mod_p256k1) + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif From 1b4fc31954825df1de43407052dd9dc99b86ffdc Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 25 Feb 2026 18:56:07 +0000 Subject: [PATCH 3/6] Add x86 correctness proofs for bignum_mod_p256k1 and bignum_mod_n256k1 HOL Light formal verification proofs for the x86-64 implementations of reduction modulo the secp256k1 field characteristic (p_256k1) and group order (n_256k1). Both proofs cover standard and Windows ABI variants with IBT/non-IBT subroutine wrappers. Co-Authored-By: Claude Opus 4.6 --- arm/proofs/specifications.txt | 2 + x86/proofs/bignum_mod_n256k1.ml | 547 ++++++++++++++++++++++++++++++++ x86/proofs/bignum_mod_p256k1.ml | 518 ++++++++++++++++++++++++++++++ x86/proofs/specifications.txt | 8 + 4 files changed, 1075 insertions(+) create mode 100644 x86/proofs/bignum_mod_n256k1.ml create mode 100644 x86/proofs/bignum_mod_p256k1.ml diff --git a/arm/proofs/specifications.txt b/arm/proofs/specifications.txt index c5fc448bd..6adc73324 100644 --- a/arm/proofs/specifications.txt +++ b/arm/proofs/specifications.txt @@ -136,6 +136,7 @@ BIGNUM_MOD_N25519_4_SUBROUTINE_CORRECT BIGNUM_MOD_N25519_SUBROUTINE_CORRECT BIGNUM_MOD_N25519_SUBROUTINE_SAFE BIGNUM_MOD_N256K1_4_SUBROUTINE_CORRECT +BIGNUM_MOD_N256K1_SUBROUTINE_CORRECT BIGNUM_MOD_N256_4_SUBROUTINE_CORRECT BIGNUM_MOD_N256_SUBROUTINE_CORRECT BIGNUM_MOD_N384_6_SUBROUTINE_CORRECT @@ -146,6 +147,7 @@ BIGNUM_MOD_NSM2_4_SUBROUTINE_CORRECT BIGNUM_MOD_NSM2_SUBROUTINE_CORRECT BIGNUM_MOD_P25519_4_SUBROUTINE_CORRECT BIGNUM_MOD_P256K1_4_SUBROUTINE_CORRECT +BIGNUM_MOD_P256K1_SUBROUTINE_CORRECT BIGNUM_MOD_P256_4_SUBROUTINE_CORRECT BIGNUM_MOD_P256_SUBROUTINE_CORRECT BIGNUM_MOD_P384_6_SUBROUTINE_CORRECT diff --git a/x86/proofs/bignum_mod_n256k1.ml b/x86/proofs/bignum_mod_n256k1.ml new file mode 100644 index 000000000..00a2ad578 --- /dev/null +++ b/x86/proofs/bignum_mod_n256k1.ml @@ -0,0 +1,547 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Reduction modulo n_256k1, the order of the secp256k1 curve. *) +(* ========================================================================= *) + +needs "x86/proofs/base.ml";; + +(**** print_literal_from_elf "x86/secp256k1/bignum_mod_n256k1.o";; + ****) + +let bignum_mod_n256k1_mc = + define_assert_from_elf "bignum_mod_n256k1_mc" "x86/secp256k1/bignum_mod_n256k1.o" +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0x53; (* PUSH (% rbx) *) + 0x41; 0x54; (* PUSH (% r12) *) + 0x41; 0x55; (* PUSH (% r13) *) + 0x48; 0x83; 0xfe; 0x04; (* CMP (% rsi) (Imm8 (word 4)) *) + 0x0f; 0x82; 0x19; 0x01; 0x00; 0x00; + (* JB (Imm32 (word 281)) *) + 0x48; 0x83; 0xee; 0x04; (* SUB (% rsi) (Imm8 (word 4)) *) + 0x4c; 0x8b; 0x5c; 0xf2; 0x18; + (* MOV (% r11) (Memop Quadword (%%%% (rdx,3,rsi,&24))) *) + 0x4c; 0x8b; 0x54; 0xf2; 0x10; + (* MOV (% r10) (Memop Quadword (%%%% (rdx,3,rsi,&16))) *) + 0x4c; 0x8b; 0x4c; 0xf2; 0x08; + (* MOV (% r9) (Memop Quadword (%%%% (rdx,3,rsi,&8))) *) + 0x4c; 0x8b; 0x04; 0xf2; (* MOV (% r8) (Memop Quadword (%%% (rdx,3,rsi))) *) + 0x48; 0x89; 0xd1; (* MOV (% rcx) (% rdx) *) + 0x48; 0xb8; 0xbf; 0xbe; 0xc9; 0x2f; 0x73; 0xa1; 0x2d; 0x40; + (* MOV (% rax) (Imm64 (word 4624529908474429119)) *) + 0x48; 0xbb; 0xc4; 0x5f; 0xb7; 0x50; 0x19; 0x23; 0x51; 0x45; + (* MOV (% rbx) (Imm64 (word 4994812053365940164)) *) + 0x49; 0x01; 0xc0; (* ADD (% r8) (% rax) *) + 0x49; 0x11; 0xd9; (* ADC (% r9) (% rbx) *) + 0x49; 0x83; 0xd2; 0x01; (* ADC (% r10) (Imm8 (word 1)) *) + 0x49; 0x83; 0xd3; 0x00; (* ADC (% r11) (Imm8 (word 0)) *) + 0x4d; 0x19; 0xe4; (* SBB (% r12) (% r12) *) + 0x49; 0xf7; 0xd4; (* NOT (% r12) *) + 0x4c; 0x21; 0xe0; (* AND (% rax) (% r12) *) + 0x4c; 0x21; 0xe3; (* AND (% rbx) (% r12) *) + 0x49; 0x83; 0xe4; 0x01; (* AND (% r12) (Imm8 (word 1)) *) + 0x49; 0x29; 0xc0; (* SUB (% r8) (% rax) *) + 0x49; 0x19; 0xd9; (* SBB (% r9) (% rbx) *) + 0x4d; 0x19; 0xe2; (* SBB (% r10) (% r12) *) + 0x49; 0x83; 0xdb; 0x00; (* SBB (% r11) (Imm8 (word 0)) *) + 0x48; 0x85; 0xf6; (* TEST (% rsi) (% rsi) *) + 0x0f; 0x84; 0xa2; 0x00; 0x00; 0x00; + (* JE (Imm32 (word 162)) *) + 0x4c; 0x89; 0xd8; (* MOV (% rax) (% r11) *) + 0x48; 0x83; 0xc0; 0x01; (* ADD (% rax) (Imm8 (word 1)) *) + 0x48; 0x19; 0xd2; (* SBB (% rdx) (% rdx) *) + 0x48; 0x09; 0xd0; (* OR (% rax) (% rdx) *) + 0x48; 0x89; 0xc3; (* MOV (% rbx) (% rax) *) + 0x4c; 0x8b; 0x64; 0xf1; 0xf8; + (* MOV (% r12) (Memop Quadword (%%%% (rcx,3,rsi,-- &8))) *) + 0x49; 0x29; 0xdb; (* SUB (% r11) (% rbx) *) + 0x48; 0xb8; 0xbf; 0xbe; 0xc9; 0x2f; 0x73; 0xa1; 0x2d; 0x40; + (* MOV (% rax) (Imm64 (word 4624529908474429119)) *) + 0x48; 0xf7; 0xe3; (* MUL2 (% rdx,% rax) (% rbx) *) + 0x49; 0x89; 0xd5; (* MOV (% r13) (% rdx) *) + 0x49; 0x01; 0xc4; (* ADD (% r12) (% rax) *) + 0x49; 0x83; 0xd5; 0x00; (* ADC (% r13) (Imm8 (word 0)) *) + 0x48; 0xb8; 0xc4; 0x5f; 0xb7; 0x50; 0x19; 0x23; 0x51; 0x45; + (* MOV (% rax) (Imm64 (word 4994812053365940164)) *) + 0x48; 0xf7; 0xe3; (* MUL2 (% rdx,% rax) (% rbx) *) + 0x49; 0x01; 0xc5; (* ADD (% r13) (% rax) *) + 0x48; 0x83; 0xd2; 0x00; (* ADC (% rdx) (Imm8 (word 0)) *) + 0x48; 0x01; 0xda; (* ADD (% rdx) (% rbx) *) + 0xb8; 0x00; 0x00; 0x00; 0x00; + (* MOV (% eax) (Imm32 (word 0)) *) + 0x48; 0x83; 0xd0; 0x00; (* ADC (% rax) (Imm8 (word 0)) *) + 0x4d; 0x01; 0xe8; (* ADD (% r8) (% r13) *) + 0x49; 0x11; 0xd1; (* ADC (% r9) (% rdx) *) + 0x49; 0x11; 0xc2; (* ADC (% r10) (% rax) *) + 0x49; 0x83; 0xd3; 0x00; (* ADC (% r11) (Imm8 (word 0)) *) + 0x48; 0xb8; 0xbf; 0xbe; 0xc9; 0x2f; 0x73; 0xa1; 0x2d; 0x40; + (* MOV (% rax) (Imm64 (word 4624529908474429119)) *) + 0x4c; 0x21; 0xd8; (* AND (% rax) (% r11) *) + 0x48; 0xba; 0xc4; 0x5f; 0xb7; 0x50; 0x19; 0x23; 0x51; 0x45; + (* MOV (% rdx) (Imm64 (word 4994812053365940164)) *) + 0x4c; 0x21; 0xda; (* AND (% rdx) (% r11) *) + 0x49; 0xc7; 0xc5; 0x01; 0x00; 0x00; 0x00; + (* MOV (% r13) (Imm32 (word 1)) *) + 0x4d; 0x21; 0xdd; (* AND (% r13) (% r11) *) + 0x49; 0x29; 0xc4; (* SUB (% r12) (% rax) *) + 0x49; 0x19; 0xd0; (* SBB (% r8) (% rdx) *) + 0x4d; 0x19; 0xe9; (* SBB (% r9) (% r13) *) + 0x49; 0x83; 0xda; 0x00; (* SBB (% r10) (Imm8 (word 0)) *) + 0x4d; 0x89; 0xd3; (* MOV (% r11) (% r10) *) + 0x4d; 0x89; 0xca; (* MOV (% r10) (% r9) *) + 0x4d; 0x89; 0xc1; (* MOV (% r9) (% r8) *) + 0x4d; 0x89; 0xe0; (* MOV (% r8) (% r12) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x0f; 0x85; 0x5e; 0xff; 0xff; 0xff; + (* JNE (Imm32 (word 4294967134)) *) + 0x4c; 0x89; 0x07; (* MOV (Memop Quadword (%% (rdi,0))) (% r8) *) + 0x4c; 0x89; 0x4f; 0x08; (* MOV (Memop Quadword (%% (rdi,8))) (% r9) *) + 0x4c; 0x89; 0x57; 0x10; (* MOV (Memop Quadword (%% (rdi,16))) (% r10) *) + 0x4c; 0x89; 0x5f; 0x18; (* MOV (Memop Quadword (%% (rdi,24))) (% r11) *) + 0x41; 0x5d; (* POP (% r13) *) + 0x41; 0x5c; (* POP (% r12) *) + 0x5b; (* POP (% rbx) *) + 0xc3; (* RET *) + 0x4d; 0x31; 0xc0; (* XOR (% r8) (% r8) *) + 0x4d; 0x31; 0xc9; (* XOR (% r9) (% r9) *) + 0x4d; 0x31; 0xd2; (* XOR (% r10) (% r10) *) + 0x4d; 0x31; 0xdb; (* XOR (% r11) (% r11) *) + 0x48; 0x85; 0xf6; (* TEST (% rsi) (% rsi) *) + 0x74; 0xda; (* JE (Imm8 (word 218)) *) + 0x4c; 0x8b; 0x02; (* MOV (% r8) (Memop Quadword (%% (rdx,0))) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x74; 0xd2; (* JE (Imm8 (word 210)) *) + 0x4c; 0x8b; 0x4a; 0x08; (* MOV (% r9) (Memop Quadword (%% (rdx,8))) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x74; 0xc9; (* JE (Imm8 (word 201)) *) + 0x4c; 0x8b; 0x52; 0x10; (* MOV (% r10) (Memop Quadword (%% (rdx,16))) *) + 0xeb; 0xc3 (* JMP (Imm8 (word 195)) *) +];; + +let bignum_mod_n256k1_tmc = define_trimmed "bignum_mod_n256k1_tmc" bignum_mod_n256k1_mc;; + +let BIGNUM_MOD_N256K1_EXEC = X86_MK_EXEC_RULE bignum_mod_n256k1_tmc;; + +(* ------------------------------------------------------------------------- *) +(* Common tactic for slightly different standard and Windows variants. *) +(* ------------------------------------------------------------------------- *) + +let n_256k1 = new_definition `n_256k1 = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141`;; + +let n256k1longredlemma = prove + (`!n. n < 2 EXP 64 * n_256k1 + ==> let q = MIN (n DIV 2 EXP 256 + 1) (2 EXP 64 - 1) in + q < 2 EXP 64 /\ + q * n_256k1 <= n + n_256k1 /\ + n < q * n_256k1 + n_256k1`, + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[n_256k1] THEN ARITH_TAC);; + +let tac execth offset = + X_GEN_TAC `z:int64` THEN W64_GEN_TAC `k:num` THEN + MAP_EVERY X_GEN_TAC [`x:int64`; `n:num`; `pc:num`] THEN + REWRITE_TAC[NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS] THEN DISCH_TAC THEN + BIGNUM_TERMRANGE_TAC `k:num` `n:num` THEN + + (*** Case split over the k < 4 case, which is a different path ***) + + ASM_CASES_TAC `k < 4` THENL + [SUBGOAL_THEN `n MOD n_256k1 = n` SUBST1_TAC THENL + [MATCH_MP_TAC MOD_LT THEN + TRANS_TAC LTE_TRANS `2 EXP (64 * k)` THEN ASM_REWRITE_TAC[] THEN + TRANS_TAC LE_TRANS `2 EXP (64 * 3)` THEN + ASM_REWRITE_TAC[LE_EXP; n_256k1] THEN CONV_TAC NUM_REDUCE_CONV THEN + ASM_ARITH_TAC; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + BIGNUM_DIGITIZE_TAC "n_" `read(memory :> bytes(x,8 * 4)) s0` THEN + FIRST_X_ASSUM(MP_TAC o MATCH_MP (ARITH_RULE + `k < 4 ==> k = 0 \/ k = 1 \/ k = 2 \/ k = 3`)) THEN + DISCH_THEN(REPEAT_TCL DISJ_CASES_THEN SUBST_ALL_TAC) THEN + EXPAND_TAC "n" THEN CONV_TAC(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THENL + [X86_STEPS_TAC execth (1--12); + X86_STEPS_TAC execth (1--15); + X86_STEPS_TAC execth (1--18); + X86_STEPS_TAC execth (1--20)] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[VAL_WORD_0] THEN + ARITH_TAC; + FIRST_ASSUM(ASSUME_TAC o GEN_REWRITE_RULE I [NOT_LT])] THEN + + (*** Initial 4-digit modulus ***) + + ENSURES_SEQUENCE_TAC (offset 0x68) + `\s. bignum_from_memory(x,k) s = n /\ + read RDI s = z /\ + read RCX s = x /\ + read RSI s = word(k - 4) /\ + bignum_of_wordlist[read R8 s; read R9 s; read R10 s; read R11 s] = + (highdigits n (k - 4)) MOD n_256k1` THEN + CONJ_TAC THENL + [ABBREV_TAC `j = k - 4` THEN VAL_INT64_TAC `j:num` THEN + SUBGOAL_THEN `word_sub (word k) (word 4):int64 = word j` ASSUME_TAC THENL + [SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + REWRITE_TAC[WORD_SUB] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + EXPAND_TAC "n" THEN REWRITE_TAC[highdigits] THEN + REWRITE_TAC[GSYM BIGNUM_FROM_MEMORY_BYTES; BIGNUM_FROM_MEMORY_DIV] THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + ASM_SIMP_TAC[ARITH_RULE `4 <= k ==> k - (k - 4) = 4`] THEN + ABBREV_TAC `m = bignum_from_memory(word_add x (word (8 * j)),4) s0` THEN + SUBGOAL_THEN `m < 2 EXP (64 * 4)` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BOUND]; ALL_TAC] THEN + RULE_ASSUM_TAC(CONV_RULE(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV)) THEN + BIGNUM_DIGITIZE_TAC "m_" + `read (memory :> bytes (word_add x (word(8 * j)),8 * 4)) s0` THEN + X86_ACCSTEPS_TAC execth [11;12;13;14] (1--14) THEN + SUBGOAL_THEN `carry_s14 <=> n_256k1 <= m` SUBST_ALL_TAC THENL + [MATCH_MP_TAC FLAG_FROM_CARRY_LE THEN EXISTS_TAC `256` THEN + EXPAND_TAC "m" THEN REWRITE_TAC[n_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN BOUNDER_TAC[]; + ALL_TAC] THEN + X86_ACCSTEPS_TAC execth [20;21;22;23] (15--23) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ASM_REWRITE_TAC[] THEN DISCARD_STATE_TAC "s23" THEN + W(MP_TAC o PART_MATCH (lhand o rand) MOD_CASES o rand o snd) THEN + ANTS_TAC THENL + [TRANS_TAC LTE_TRANS `2 EXP (64 * 4)` THEN + ASM_REWRITE_TAC[n_256k1] THEN CONV_TAC NUM_REDUCE_CONV; + DISCH_THEN SUBST1_TAC] THEN + REWRITE_TAC[GSYM NOT_LE; COND_SWAP] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_EQ; GSYM REAL_OF_NUM_ADD] THEN + ONCE_REWRITE_TAC[COND_RAND] THEN SIMP_TAC[GSYM REAL_OF_NUM_SUB] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN REWRITE_TAC[REAL_OF_NUM_LE] THEN + MATCH_MP_TAC EQUAL_FROM_CONGRUENT_REAL THEN + MAP_EVERY EXISTS_TAC [`256`; `&0:real`] THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN CONJ_TAC THENL + [UNDISCH_TAC `m < 2 EXP (64 * 4)` THEN REWRITE_TAC[n_256k1] THEN + POP_ASSUM_LIST(K ALL_TAC) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[GSYM REAL_OF_NUM_LE; GSYM REAL_OF_NUM_LT] THEN + REAL_ARITH_TAC; + ALL_TAC] THEN + CONJ_TAC THENL [REAL_INTEGER_TAC; ALL_TAC] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[GSYM REAL_OF_NUM_ADD] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_MUL; GSYM REAL_OF_NUM_POW] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + REWRITE_TAC[REAL_BITVAL_NOT; n_256k1] THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REWRITE_TAC[WORD_AND_MASK] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC; + ALL_TAC] THEN + + (*** Finish off the k = 4 case which is now just the writeback ***) + + FIRST_ASSUM(DISJ_CASES_THEN2 SUBST_ALL_TAC ASSUME_TAC o MATCH_MP (ARITH_RULE + `4 <= k ==> k = 4 \/ 4 < k`)) + THENL + [GHOST_INTRO_TAC `d0:int64` `read R8` THEN + GHOST_INTRO_TAC `d1:int64` `read R9` THEN + GHOST_INTRO_TAC `d2:int64` `read R10` THEN + GHOST_INTRO_TAC `d3:int64` `read R11` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--6) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC; + ALL_TAC] THEN + + SUBGOAL_THEN `0 < k - 4 /\ ~(k - 4 = 0)` STRIP_ASSUME_TAC THENL + [SIMPLE_ARITH_TAC; ALL_TAC] THEN + + (*** Setup of loop invariant ***) + + ENSURES_WHILE_PDOWN_TAC `k - 4` (offset 0x71) (offset 0x10d) + `\i s. (bignum_from_memory(x,k) s = n /\ + read RDI s = z /\ + read RCX s = x /\ + read RSI s = word i /\ + bignum_of_wordlist[read R8 s; read R9 s; read R10 s; read R11 s] = + (highdigits n i) MOD n_256k1) /\ + (read ZF s <=> i = 0)` THEN + ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL + [VAL_INT64_TAC `k - 4` THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--2) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + ALL_TAC; (*** Main loop invariant ***) + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + GHOST_INTRO_TAC `d0:int64` `read R8` THEN + GHOST_INTRO_TAC `d1:int64` `read R9` THEN + GHOST_INTRO_TAC `d2:int64` `read R10` THEN + GHOST_INTRO_TAC `d3:int64` `read R11` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--5) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC] THEN + + (*** Mathematics of main loop with decomposition and quotient estimate ***) + + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + GHOST_INTRO_TAC `m1:int64` `read R8` THEN + GHOST_INTRO_TAC `m2:int64` `read R9` THEN + GHOST_INTRO_TAC `m3:int64` `read R10` THEN + GHOST_INTRO_TAC `m4:int64` `read R11` THEN + GLOBALIZE_PRECONDITION_TAC THEN ASM_REWRITE_TAC[] THEN + ABBREV_TAC `m0:int64 = word(bigdigit n i)` THEN + ABBREV_TAC `m = bignum_of_wordlist[m0; m1; m2; m3; m4]` THEN + SUBGOAL_THEN `m < 2 EXP 64 * n_256k1` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + MP_TAC(SPEC `m0:int64` VAL_BOUND_64) THEN + ASM_REWRITE_TAC[n_256k1] THEN ARITH_TAC; + ALL_TAC] THEN + SUBGOAL_THEN `highdigits n i MOD n_256k1 = m MOD n_256k1` SUBST1_TAC THENL + [ONCE_REWRITE_TAC[HIGHDIGITS_STEP] THEN + EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + EXPAND_TAC "m0" THEN + SIMP_TAC[VAL_WORD_EQ; DIMINDEX_64; BIGDIGIT_BOUND] THEN + ASM_REWRITE_TAC[] THEN CONV_TAC MOD_DOWN_CONV THEN + AP_THM_TAC THEN AP_TERM_TAC THEN ARITH_TAC; + ALL_TAC] THEN + MP_TAC(SPEC `m:num` n256k1longredlemma) THEN ASM_REWRITE_TAC[] THEN + LET_TAC THEN STRIP_TAC THEN + + (*** The computation of the quotient estimate q ***) + + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + X86_ACCSTEPS_TAC execth [2] (1--2) THEN + X86_STEPS_TAC execth (3--5) THEN + FIRST_X_ASSUM(MP_TAC o SPEC `word q:int64` o MATCH_MP (MESON[] + `!q. read RAX s = q' ==> q' = q ==> read RAX s = q`)) THEN + ANTS_TAC THENL + [EXPAND_TAC "q" THEN + EXPAND_TAC "m" THEN + CONV_TAC(ONCE_DEPTH_CONV BIGNUM_OF_WORDLIST_DIV_CONV) THEN + ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + REWRITE_TAC[WORD_OR_MASK] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN DISCH_TAC THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THENL + [REWRITE_TAC[ARITH_RULE + `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN + CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; + SIMP_TAC[VAL_BOUND_64; ARITH_RULE + `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN + GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN + AP_TERM_TAC THEN ASM_ARITH_TAC]; + DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN + + (*** The next digit in the current state ***) + + VAL_INT64_TAC `i + 1` THEN + ASSUME_TAC(SPEC `i:num` WORD_INDEX_WRAP) THEN + SUBGOAL_THEN `i:num < k` ASSUME_TAC THENL [SIMPLE_ARITH_TAC; ALL_TAC] THEN + MP_TAC(SPECL [`k:num`; `x:int64`; `s5:x86state`; `i:num`] + BIGDIGIT_BIGNUM_FROM_MEMORY) THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + DISCH_THEN(MP_TAC o AP_TERM `word:num->int64` o SYM) THEN + ASM_REWRITE_TAC[WORD_VAL] THEN DISCH_TAC THEN + + (*** A bit of fiddle to make the accumulation tactics work ***) + + ABBREV_TAC `w:int64 = word q` THEN + UNDISCH_THEN `val(w:int64) = q` (SUBST_ALL_TAC o SYM) THEN + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC o end_itlist CONJ) THEN + + (*** Subtraction of q * n_256k1 ***) + + X86_ACCSTEPS_TAC execth [7;9;11;12;14;15;16;17;19;20;21;22;23] (6--23) THEN + + SUBGOAL_THEN + `sum_s23:int64 = word_neg(word(bitval(m < val(w:int64) * n_256k1))) /\ + &(bignum_of_wordlist[sum_s11; sum_s20; sum_s21; sum_s22]) = + if m < val w * n_256k1 then &m - &(val w * n_256k1) + &2 pow 256 + else &m - &(val w * n_256k1)` + STRIP_ASSUME_TAC THENL + [MATCH_MP_TAC MASK_AND_VALUE_FROM_CARRY_LT THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC + [`val(w:int64) * n_256k1 <= m + n_256k1`; + `m < val(w:int64) * n_256k1 + n_256k1`] THEN + REWRITE_TAC[n_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN REAL_ARITH_TAC; + ASM_REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES]] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; CONV_TAC(ONCE_DEPTH_CONV NUM_ADD_CONV)] THEN + ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES; n_256k1] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN + REPEAT(MATCH_MP_TAC INTEGER_ADD THEN CONJ_TAC) THEN + TRY REAL_INTEGER_TAC THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC)] THEN + + (*** Final correction ***) + + X86_ACCSTEPS_TAC execth [30;31;32;33] (24--38) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + MATCH_MP_TAC(TAUT `p /\ (p ==> q) ==> p /\ q`) THEN + CONJ_TAC THENL [CONV_TAC WORD_RULE; DISCH_THEN SUBST1_TAC] THEN + ASM_REWRITE_TAC[] THEN + + CONV_TAC SYM_CONV THEN MATCH_MP_TAC MOD_UNIQ_BALANCED_REAL THEN + MAP_EVERY EXISTS_TAC [`val(w:int64)`; `256`] THEN + ASM_REWRITE_TAC[] THEN + ABBREV_TAC `b <=> m < val(w:int64) * n_256k1` THEN + REWRITE_TAC[REAL_ARITH + `m - s - (w - b) * n:real = (m - w * n) + (b * n - s)`] THEN + FIRST_ASSUM(MP_TAC o MATCH_MP (MESON[REAL_ADD_RID] + `x = (if p then y + z else y) ==> x = y + (if p then z else &0)`)) THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES; bignum_of_wordlist; n_256k1] THEN + GEN_REWRITE_TAC LAND_CONV [REAL_ARITH `x:real = y + z <=> y = x - z`] THEN + DISCH_THEN SUBST1_TAC THEN + CONJ_TAC THENL [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN POP_ASSUM_LIST(K ALL_TAC) THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC;; + +(* ------------------------------------------------------------------------- *) +(* Correctness of standard ABI version. *) +(* ------------------------------------------------------------------------- *) + +let BIGNUM_MOD_N256K1_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0x150) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_tmc /\ + read RIP s = word(pc + 0x5) /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = word (pc + 0x122) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RIP; RSI; RAX; RDX; RCX; RBX; R8; R9; R10; R11; + R12; R13] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + tac BIGNUM_MOD_N256K1_EXEC (curry mk_comb `(+) (pc:num)` o mk_small_numeral));; + +let BIGNUM_MOD_N256K1_NOIBT_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 24),32) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 24),24)) + [(word pc,LENGTH bignum_mod_n256k1_tmc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_n256k1_tmc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 24),24)])`, + X86_ADD_RETURN_STACK_TAC BIGNUM_MOD_N256K1_EXEC BIGNUM_MOD_N256K1_CORRECT + `[RBX; R12; R13]` 24);; + +let BIGNUM_MOD_N256K1_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 24),32) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 24),24)) + [(word pc,LENGTH bignum_mod_n256k1_mc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_n256k1_mc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 24),24)])`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE BIGNUM_MOD_N256K1_NOIBT_SUBROUTINE_CORRECT));; + +(* ------------------------------------------------------------------------- *) +(* Correctness of Windows ABI version. *) +(* ------------------------------------------------------------------------- *) + +let bignum_mod_n256k1_windows_mc = define_from_elf + "bignum_mod_n256k1_windows_mc" "x86/secp256k1/bignum_mod_n256k1.obj";; + +let bignum_mod_n256k1_windows_tmc = define_trimmed "bignum_mod_n256k1_windows_tmc" bignum_mod_n256k1_windows_mc;; + +let BIGNUM_MOD_N256K1_WINDOWS_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0x15d) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_windows_tmc /\ + read RIP s = word(pc + 0x10) /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = word (pc + 0x12d) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RIP; RSI; RAX; RDX; RCX; RBX; R8; R9; R10; R11; + R12; R13] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + tac (X86_MK_EXEC_RULE bignum_mod_n256k1_windows_tmc) + (curry mk_comb `(+) (pc:num)` o mk_small_numeral o (fun n -> n+11)));; + +let BIGNUM_MOD_N256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 40),48) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 40),40)) + [(word pc,LENGTH bignum_mod_n256k1_windows_tmc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_n256k1_windows_tmc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_windows_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + WINDOWS_C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RSP] ,, WINDOWS_MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 40),40)])`, + GEN_X86_ADD_RETURN_STACK_TAC (X86_MK_EXEC_RULE bignum_mod_n256k1_windows_tmc) + BIGNUM_MOD_N256K1_WINDOWS_CORRECT + `[RDI; RSI; RBX; R12; R13]` 40 (8,6));; + +let BIGNUM_MOD_N256K1_WINDOWS_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 40),48) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 40),40)) + [(word pc,LENGTH bignum_mod_n256k1_windows_mc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_n256k1_windows_mc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_n256k1_windows_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + WINDOWS_C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD n_256k1) + (MAYCHANGE [RSP] ,, WINDOWS_MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 40),40)])`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE BIGNUM_MOD_N256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT));; diff --git a/x86/proofs/bignum_mod_p256k1.ml b/x86/proofs/bignum_mod_p256k1.ml new file mode 100644 index 000000000..a7bbfb0ab --- /dev/null +++ b/x86/proofs/bignum_mod_p256k1.ml @@ -0,0 +1,518 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Reduction modulo p_256k1, the field characteristic for secp256k1. *) +(* ========================================================================= *) + +needs "x86/proofs/base.ml";; + +(**** print_literal_from_elf "x86/secp256k1/bignum_mod_p256k1.o";; + ****) + +let bignum_mod_p256k1_mc = + define_assert_from_elf "bignum_mod_p256k1_mc" "x86/secp256k1/bignum_mod_p256k1.o" +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0x53; (* PUSH (% rbx) *) + 0x41; 0x54; (* PUSH (% r12) *) + 0x48; 0x83; 0xfe; 0x04; (* CMP (% rsi) (Imm8 (word 4)) *) + 0x0f; 0x82; 0xb3; 0x00; 0x00; 0x00; + (* JB (Imm32 (word 179)) *) + 0x48; 0x83; 0xee; 0x04; (* SUB (% rsi) (Imm8 (word 4)) *) + 0x4c; 0x8b; 0x5c; 0xf2; 0x18; + (* MOV (% r11) (Memop Quadword (%%%% (rdx,3,rsi,&24))) *) + 0x4c; 0x8b; 0x54; 0xf2; 0x10; + (* MOV (% r10) (Memop Quadword (%%%% (rdx,3,rsi,&16))) *) + 0x4c; 0x8b; 0x4c; 0xf2; 0x08; + (* MOV (% r9) (Memop Quadword (%%%% (rdx,3,rsi,&8))) *) + 0x4c; 0x8b; 0x04; 0xf2; (* MOV (% r8) (Memop Quadword (%%% (rdx,3,rsi))) *) + 0x48; 0x89; 0xd1; (* MOV (% rcx) (% rdx) *) + 0x48; 0xbb; 0xd1; 0x03; 0x00; 0x00; 0x01; 0x00; 0x00; 0x00; + (* MOV (% rbx) (Imm64 (word 4294968273)) *) + 0x49; 0x01; 0xd8; (* ADD (% r8) (% rbx) *) + 0x49; 0x83; 0xd1; 0x00; (* ADC (% r9) (Imm8 (word 0)) *) + 0x49; 0x83; 0xd2; 0x00; (* ADC (% r10) (Imm8 (word 0)) *) + 0x49; 0x83; 0xd3; 0x00; (* ADC (% r11) (Imm8 (word 0)) *) + 0x48; 0x19; 0xc0; (* SBB (% rax) (% rax) *) + 0x48; 0xf7; 0xd0; (* NOT (% rax) *) + 0x48; 0x21; 0xd8; (* AND (% rax) (% rbx) *) + 0x49; 0x29; 0xc0; (* SUB (% r8) (% rax) *) + 0x49; 0x83; 0xd9; 0x00; (* SBB (% r9) (Imm8 (word 0)) *) + 0x49; 0x83; 0xda; 0x00; (* SBB (% r10) (Imm8 (word 0)) *) + 0x49; 0x83; 0xdb; 0x00; (* SBB (% r11) (Imm8 (word 0)) *) + 0x48; 0x85; 0xf6; (* TEST (% rsi) (% rsi) *) + 0x74; 0x50; (* JE (Imm8 (word 80)) *) + 0x4c; 0x89; 0xd8; (* MOV (% rax) (% r11) *) + 0x48; 0x83; 0xc0; 0x01; (* ADD (% rax) (Imm8 (word 1)) *) + 0x48; 0x19; 0xd2; (* SBB (% rdx) (% rdx) *) + 0x48; 0x09; 0xd0; (* OR (% rax) (% rdx) *) + 0x4c; 0x8b; 0x64; 0xf1; 0xf8; + (* MOV (% r12) (Memop Quadword (%%%% (rcx,3,rsi,-- &8))) *) + 0x49; 0x29; 0xc3; (* SUB (% r11) (% rax) *) + 0x48; 0xf7; 0xe3; (* MUL2 (% rdx,% rax) (% rbx) *) + 0x49; 0x01; 0xc4; (* ADD (% r12) (% rax) *) + 0x49; 0x11; 0xd0; (* ADC (% r8) (% rdx) *) + 0x49; 0x83; 0xd1; 0x00; (* ADC (% r9) (Imm8 (word 0)) *) + 0x49; 0x83; 0xd2; 0x00; (* ADC (% r10) (Imm8 (word 0)) *) + 0x49; 0x83; 0xd3; 0x00; (* ADC (% r11) (Imm8 (word 0)) *) + 0x48; 0x89; 0xd8; (* MOV (% rax) (% rbx) *) + 0x4c; 0x21; 0xd8; (* AND (% rax) (% r11) *) + 0x49; 0x29; 0xc4; (* SUB (% r12) (% rax) *) + 0x49; 0x83; 0xd8; 0x00; (* SBB (% r8) (Imm8 (word 0)) *) + 0x49; 0x83; 0xd9; 0x00; (* SBB (% r9) (Imm8 (word 0)) *) + 0x49; 0x83; 0xda; 0x00; (* SBB (% r10) (Imm8 (word 0)) *) + 0x4d; 0x89; 0xd3; (* MOV (% r11) (% r10) *) + 0x4d; 0x89; 0xca; (* MOV (% r10) (% r9) *) + 0x4d; 0x89; 0xc1; (* MOV (% r9) (% r8) *) + 0x4d; 0x89; 0xe0; (* MOV (% r8) (% r12) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x75; 0xb0; (* JNE (Imm8 (word 176)) *) + 0x4c; 0x89; 0x07; (* MOV (Memop Quadword (%% (rdi,0))) (% r8) *) + 0x4c; 0x89; 0x4f; 0x08; (* MOV (Memop Quadword (%% (rdi,8))) (% r9) *) + 0x4c; 0x89; 0x57; 0x10; (* MOV (Memop Quadword (%% (rdi,16))) (% r10) *) + 0x4c; 0x89; 0x5f; 0x18; (* MOV (Memop Quadword (%% (rdi,24))) (% r11) *) + 0x41; 0x5c; (* POP (% r12) *) + 0x5b; (* POP (% rbx) *) + 0xc3; (* RET *) + 0x4d; 0x31; 0xc0; (* XOR (% r8) (% r8) *) + 0x4d; 0x31; 0xc9; (* XOR (% r9) (% r9) *) + 0x4d; 0x31; 0xd2; (* XOR (% r10) (% r10) *) + 0x4d; 0x31; 0xdb; (* XOR (% r11) (% r11) *) + 0x48; 0x85; 0xf6; (* TEST (% rsi) (% rsi) *) + 0x74; 0xdc; (* JE (Imm8 (word 220)) *) + 0x4c; 0x8b; 0x02; (* MOV (% r8) (Memop Quadword (%% (rdx,0))) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x74; 0xd4; (* JE (Imm8 (word 212)) *) + 0x4c; 0x8b; 0x4a; 0x08; (* MOV (% r9) (Memop Quadword (%% (rdx,8))) *) + 0x48; 0xff; 0xce; (* DEC (% rsi) *) + 0x74; 0xcb; (* JE (Imm8 (word 203)) *) + 0x4c; 0x8b; 0x52; 0x10; (* MOV (% r10) (Memop Quadword (%% (rdx,16))) *) + 0xeb; 0xc5 (* JMP (Imm8 (word 197)) *) +];; + +let bignum_mod_p256k1_tmc = define_trimmed "bignum_mod_p256k1_tmc" bignum_mod_p256k1_mc;; + +let BIGNUM_MOD_P256K1_EXEC = X86_MK_EXEC_RULE bignum_mod_p256k1_tmc;; + +(* ------------------------------------------------------------------------- *) +(* Common tactic for slightly different standard and Windows variants. *) +(* ------------------------------------------------------------------------- *) + +let p_256k1 = new_definition `p_256k1 = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F`;; + +let p256k1longredlemma = prove + (`!n. n < 2 EXP 64 * p_256k1 + ==> let q = MIN (n DIV 2 EXP 256 + 1) (2 EXP 64 - 1) in + q < 2 EXP 64 /\ + q * p_256k1 <= n + p_256k1 /\ + n < q * p_256k1 + p_256k1`, + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN REWRITE_TAC[p_256k1] THEN ARITH_TAC);; + +let tac execth offset = + X_GEN_TAC `z:int64` THEN W64_GEN_TAC `k:num` THEN + MAP_EVERY X_GEN_TAC [`x:int64`; `n:num`; `pc:num`] THEN + REWRITE_TAC[NONOVERLAPPING_CLAUSES] THEN + REWRITE_TAC[C_ARGUMENTS; C_RETURN; SOME_FLAGS] THEN DISCH_TAC THEN + BIGNUM_TERMRANGE_TAC `k:num` `n:num` THEN + + (*** Case split over the k < 4 case, which is a different path ***) + + ASM_CASES_TAC `k < 4` THENL + [SUBGOAL_THEN `n MOD p_256k1 = n` SUBST1_TAC THENL + [MATCH_MP_TAC MOD_LT THEN + TRANS_TAC LTE_TRANS `2 EXP (64 * k)` THEN ASM_REWRITE_TAC[] THEN + TRANS_TAC LE_TRANS `2 EXP (64 * 3)` THEN + ASM_REWRITE_TAC[LE_EXP; p_256k1] THEN CONV_TAC NUM_REDUCE_CONV THEN + ASM_ARITH_TAC; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + BIGNUM_DIGITIZE_TAC "n_" `read(memory :> bytes(x,8 * 4)) s0` THEN + FIRST_X_ASSUM(MP_TAC o MATCH_MP (ARITH_RULE + `k < 4 ==> k = 0 \/ k = 1 \/ k = 2 \/ k = 3`)) THEN + DISCH_THEN(REPEAT_TCL DISJ_CASES_THEN SUBST_ALL_TAC) THEN + EXPAND_TAC "n" THEN CONV_TAC(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THENL + [X86_STEPS_TAC execth (1--12); + X86_STEPS_TAC execth (1--15); + X86_STEPS_TAC execth (1--18); + X86_STEPS_TAC execth (1--20)] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[VAL_WORD_0] THEN + ARITH_TAC; + FIRST_ASSUM(ASSUME_TAC o GEN_REWRITE_RULE I [NOT_LT])] THEN + + (*** Initial 4-digit modulus ***) + + ENSURES_SEQUENCE_TAC (offset 0x58) + `\s. bignum_from_memory(x,k) s = n /\ + read RDI s = z /\ + read RCX s = x /\ + read RSI s = word(k - 4) /\ + read RBX s = word 0x1000003D1 /\ + bignum_of_wordlist[read R8 s; read R9 s; read R10 s; read R11 s] = + (highdigits n (k - 4)) MOD p_256k1` THEN + CONJ_TAC THENL + [ABBREV_TAC `j = k - 4` THEN VAL_INT64_TAC `j:num` THEN + SUBGOAL_THEN `word_sub (word k) (word 4):int64 = word j` ASSUME_TAC THENL + [SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + REWRITE_TAC[WORD_SUB] THEN ASM_REWRITE_TAC[]; + ALL_TAC] THEN + REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + EXPAND_TAC "n" THEN REWRITE_TAC[highdigits] THEN + REWRITE_TAC[GSYM BIGNUM_FROM_MEMORY_BYTES; BIGNUM_FROM_MEMORY_DIV] THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + SUBST1_TAC(SYM(ASSUME `k - 4 = j`)) THEN + ASM_SIMP_TAC[ARITH_RULE `4 <= k ==> k - (k - 4) = 4`] THEN + ABBREV_TAC `m = bignum_from_memory(word_add x (word (8 * j)),4) s0` THEN + SUBGOAL_THEN `m < 2 EXP (64 * 4)` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BOUND]; ALL_TAC] THEN + RULE_ASSUM_TAC(CONV_RULE(ONCE_DEPTH_CONV BIGNUM_EXPAND_CONV)) THEN + BIGNUM_DIGITIZE_TAC "m_" + `read (memory :> bytes (word_add x (word(8 * j)),8 * 4)) s0` THEN + X86_ACCSTEPS_TAC execth [10;11;12;13] (1--13) THEN + SUBGOAL_THEN `carry_s13 <=> p_256k1 <= m` SUBST_ALL_TAC THENL + [MATCH_MP_TAC FLAG_FROM_CARRY_LE THEN EXISTS_TAC `256` THEN + EXPAND_TAC "m" THEN REWRITE_TAC[p_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN BOUNDER_TAC[]; + ALL_TAC] THEN + X86_ACCSTEPS_TAC execth [17;18;19;20] (14--20) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + ASM_REWRITE_TAC[] THEN DISCARD_STATE_TAC "s20" THEN + W(MP_TAC o PART_MATCH (lhand o rand) MOD_CASES o rand o snd) THEN + ANTS_TAC THENL + [TRANS_TAC LTE_TRANS `2 EXP (64 * 4)` THEN + ASM_REWRITE_TAC[p_256k1] THEN CONV_TAC NUM_REDUCE_CONV; + DISCH_THEN SUBST1_TAC] THEN + REWRITE_TAC[GSYM NOT_LE; COND_SWAP] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_EQ; GSYM REAL_OF_NUM_ADD] THEN + ONCE_REWRITE_TAC[COND_RAND] THEN SIMP_TAC[GSYM REAL_OF_NUM_SUB] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES] THEN REWRITE_TAC[REAL_OF_NUM_LE] THEN + MATCH_MP_TAC EQUAL_FROM_CONGRUENT_REAL THEN + MAP_EVERY EXISTS_TAC [`256`; `&0:real`] THEN ASM_REWRITE_TAC[] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN CONJ_TAC THENL + [UNDISCH_TAC `m < 2 EXP (64 * 4)` THEN REWRITE_TAC[p_256k1] THEN + POP_ASSUM_LIST(K ALL_TAC) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[GSYM REAL_OF_NUM_LE; GSYM REAL_OF_NUM_LT] THEN + REAL_ARITH_TAC; + ALL_TAC] THEN + CONJ_TAC THENL [REAL_INTEGER_TAC; ALL_TAC] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[GSYM REAL_OF_NUM_ADD] THEN ASM_REWRITE_TAC[] THEN + REWRITE_TAC[GSYM REAL_OF_NUM_MUL; GSYM REAL_OF_NUM_POW] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + REWRITE_TAC[REAL_BITVAL_NOT; p_256k1] THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REWRITE_TAC[WORD_AND_MASK] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC; + ALL_TAC] THEN + + (*** Finish off the k = 4 case which is now just the writeback ***) + + FIRST_ASSUM(DISJ_CASES_THEN2 SUBST_ALL_TAC ASSUME_TAC o MATCH_MP (ARITH_RULE + `4 <= k ==> k = 4 \/ 4 < k`)) + THENL + [GHOST_INTRO_TAC `d0:int64` `read R8` THEN + GHOST_INTRO_TAC `d1:int64` `read R9` THEN + GHOST_INTRO_TAC `d2:int64` `read R10` THEN + GHOST_INTRO_TAC `d3:int64` `read R11` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--6) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC; + ALL_TAC] THEN + + SUBGOAL_THEN `0 < k - 4 /\ ~(k - 4 = 0)` STRIP_ASSUME_TAC THENL + [SIMPLE_ARITH_TAC; ALL_TAC] THEN + + (*** Setup of loop invariant ***) + + ENSURES_WHILE_PDOWN_TAC `k - 4` (offset 0x5d) (offset 0xab) + `\i s. (bignum_from_memory(x,k) s = n /\ + read RDI s = z /\ + read RCX s = x /\ + read RSI s = word i /\ + read RBX s = word 0x1000003D1 /\ + bignum_of_wordlist[read R8 s; read R9 s; read R10 s; read R11 s] = + (highdigits n i) MOD p_256k1) /\ + (read ZF s <=> i = 0)` THEN + ASM_REWRITE_TAC[] THEN REPEAT CONJ_TAC THENL + [VAL_INT64_TAC `k - 4` THEN REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--2) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + ALL_TAC; (*** Main loop invariant ***) + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth [1] THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[]; + GHOST_INTRO_TAC `d0:int64` `read R8` THEN + GHOST_INTRO_TAC `d1:int64` `read R9` THEN + GHOST_INTRO_TAC `d2:int64` `read R10` THEN + GHOST_INTRO_TAC `d3:int64` `read R11` THEN + REWRITE_TAC[SUB_REFL; HIGHDIGITS_0] THEN + ENSURES_INIT_TAC "s0" THEN X86_STEPS_TAC execth (1--5) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + FIRST_X_ASSUM(fun th -> GEN_REWRITE_TAC RAND_CONV [SYM th]) THEN + REWRITE_TAC[bignum_of_wordlist] THEN + CONV_TAC(LAND_CONV BIGNUM_EXPAND_CONV) THEN + ASM_REWRITE_TAC[] THEN ARITH_TAC] THEN + + (*** Mathematics of main loop with decomposition and quotient estimate ***) + + X_GEN_TAC `i:num` THEN STRIP_TAC THEN VAL_INT64_TAC `i:num` THEN + GHOST_INTRO_TAC `m1:int64` `read R8` THEN + GHOST_INTRO_TAC `m2:int64` `read R9` THEN + GHOST_INTRO_TAC `m3:int64` `read R10` THEN + GHOST_INTRO_TAC `m4:int64` `read R11` THEN + GLOBALIZE_PRECONDITION_TAC THEN ASM_REWRITE_TAC[] THEN + ABBREV_TAC `m0:int64 = word(bigdigit n i)` THEN + ABBREV_TAC `m = bignum_of_wordlist[m0; m1; m2; m3; m4]` THEN + SUBGOAL_THEN `m < 2 EXP 64 * p_256k1` ASSUME_TAC THENL + [EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + MP_TAC(SPEC `m0:int64` VAL_BOUND_64) THEN + ASM_REWRITE_TAC[p_256k1] THEN ARITH_TAC; + ALL_TAC] THEN + SUBGOAL_THEN `highdigits n i MOD p_256k1 = m MOD p_256k1` SUBST1_TAC THENL + [ONCE_REWRITE_TAC[HIGHDIGITS_STEP] THEN + EXPAND_TAC "m" THEN ONCE_REWRITE_TAC[bignum_of_wordlist] THEN + EXPAND_TAC "m0" THEN + SIMP_TAC[VAL_WORD_EQ; DIMINDEX_64; BIGDIGIT_BOUND] THEN + ASM_REWRITE_TAC[] THEN CONV_TAC MOD_DOWN_CONV THEN + AP_THM_TAC THEN AP_TERM_TAC THEN ARITH_TAC; + ALL_TAC] THEN + MP_TAC(SPEC `m:num` p256k1longredlemma) THEN ASM_REWRITE_TAC[] THEN + LET_TAC THEN STRIP_TAC THEN + + (*** The computation of the quotient estimate q ***) + + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN ENSURES_INIT_TAC "s0" THEN + X86_ACCSTEPS_TAC execth [2] (1--2) THEN + X86_STEPS_TAC execth (3--4) THEN + FIRST_X_ASSUM(MP_TAC o SPEC `word q:int64` o MATCH_MP (MESON[] + `!q. read RAX s = q' ==> q' = q ==> read RAX s = q`)) THEN + ANTS_TAC THENL + [EXPAND_TAC "q" THEN + EXPAND_TAC "m" THEN + CONV_TAC(ONCE_DEPTH_CONV BIGNUM_OF_WORDLIST_DIV_CONV) THEN + ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN + REWRITE_TAC[WORD_OR_MASK] THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN DISCH_TAC THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THENL + [REWRITE_TAC[ARITH_RULE + `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN + CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; + SIMP_TAC[VAL_BOUND_64; ARITH_RULE + `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN + GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN + AP_TERM_TAC THEN ASM_ARITH_TAC]; + DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN + + (*** The next digit in the current state ***) + + VAL_INT64_TAC `i + 1` THEN + ASSUME_TAC(SPEC `i:num` WORD_INDEX_WRAP) THEN + SUBGOAL_THEN `i:num < k` ASSUME_TAC THENL [SIMPLE_ARITH_TAC; ALL_TAC] THEN + MP_TAC(SPECL [`k:num`; `x:int64`; `s4:x86state`; `i:num`] + BIGDIGIT_BIGNUM_FROM_MEMORY) THEN + ASM_REWRITE_TAC[BIGNUM_FROM_MEMORY_BYTES] THEN + DISCH_THEN(MP_TAC o AP_TERM `word:num->int64` o SYM) THEN + ASM_REWRITE_TAC[WORD_VAL] THEN DISCH_TAC THEN + + (*** A bit of fiddle to make the accumulation tactics work ***) + + ABBREV_TAC `w:int64 = word q` THEN + UNDISCH_THEN `val(w:int64) = q` (SUBST_ALL_TAC o SYM) THEN + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC o end_itlist CONJ) THEN + + (*** Subtraction of q * p_256k1 ***) + + X86_ACCSTEPS_TAC execth [6;7;8;9;10;11;12] (5--12) THEN + + SUBGOAL_THEN + `sum_s12:int64 = word_neg(word(bitval(m < val(w:int64) * p_256k1))) /\ + &(bignum_of_wordlist[sum_s8; sum_s9; sum_s10; sum_s11]) = + if m < val w * p_256k1 then &m - &(val w * p_256k1) + &2 pow 256 + else &m - &(val w * p_256k1)` + STRIP_ASSUME_TAC THENL + [MATCH_MP_TAC MASK_AND_VALUE_FROM_CARRY_LT THEN CONJ_TAC THENL + [MAP_EVERY UNDISCH_TAC + [`val(w:int64) * p_256k1 <= m + p_256k1`; + `m < val(w:int64) * p_256k1 + p_256k1`] THEN + REWRITE_TAC[p_256k1; GSYM REAL_OF_NUM_CLAUSES] THEN REAL_ARITH_TAC; + ASM_REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES]] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; CONV_TAC(ONCE_DEPTH_CONV NUM_ADD_CONV)] THEN + ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES; p_256k1] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN + REPEAT(MATCH_MP_TAC INTEGER_ADD THEN CONJ_TAC) THEN + TRY REAL_INTEGER_TAC THEN + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC)] THEN + + (*** Final correction ***) + + X86_ACCSTEPS_TAC execth [15;16;17;18] (13--23) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + MATCH_MP_TAC(TAUT `p /\ (p ==> q) ==> p /\ q`) THEN + CONJ_TAC THENL [CONV_TAC WORD_RULE; DISCH_THEN SUBST1_TAC] THEN + ASM_REWRITE_TAC[] THEN + + CONV_TAC SYM_CONV THEN MATCH_MP_TAC MOD_UNIQ_BALANCED_REAL THEN + MAP_EVERY EXISTS_TAC [`val(w:int64)`; `256`] THEN + ASM_REWRITE_TAC[] THEN + ABBREV_TAC `b <=> m < val(w:int64) * p_256k1` THEN + REWRITE_TAC[REAL_ARITH + `m - s - (w - b) * n:real = (m - w * n) + (b * n - s)`] THEN + FIRST_ASSUM(MP_TAC o MATCH_MP (MESON[REAL_ADD_RID] + `x = (if p then y + z else y) ==> x = y + (if p then z else &0)`)) THEN + REWRITE_TAC[GSYM REAL_OF_NUM_CLAUSES; bignum_of_wordlist; p_256k1] THEN + GEN_REWRITE_TAC LAND_CONV [REAL_ARITH `x:real = y + z <=> y = x - z`] THEN + DISCH_THEN SUBST1_TAC THEN + CONJ_TAC THENL [ARITH_TAC; ALL_TAC] THEN + REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES] THEN + CONJ_TAC THENL [BOUNDER_TAC[]; ALL_TAC] THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN POP_ASSUM_LIST(K ALL_TAC) THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + CONV_TAC WORD_REDUCE_CONV THEN REAL_INTEGER_TAC;; + +(* ------------------------------------------------------------------------- *) +(* Correctness of standard ABI version. *) +(* ------------------------------------------------------------------------- *) + +let BIGNUM_MOD_P256K1_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0xec) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_tmc /\ + read RIP s = word(pc + 0x3) /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = word (pc + 0xbc) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RIP; RSI; RAX; RDX; RCX; RBX; R8; R9; R10; R11; R12] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + tac BIGNUM_MOD_P256K1_EXEC (curry mk_comb `(+) (pc:num)` o mk_small_numeral));; + +let BIGNUM_MOD_P256K1_NOIBT_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 16),24) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 16),16)) + [(word pc,LENGTH bignum_mod_p256k1_tmc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_p256k1_tmc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 16),16)])`, + X86_ADD_RETURN_STACK_TAC BIGNUM_MOD_P256K1_EXEC BIGNUM_MOD_P256K1_CORRECT + `[RBX; R12]` 16);; + +let BIGNUM_MOD_P256K1_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 16),24) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 16),16)) + [(word pc,LENGTH bignum_mod_p256k1_mc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_p256k1_mc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 16),16)])`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE BIGNUM_MOD_P256K1_NOIBT_SUBROUTINE_CORRECT));; + +(* ------------------------------------------------------------------------- *) +(* Correctness of Windows ABI version. *) +(* ------------------------------------------------------------------------- *) + +let bignum_mod_p256k1_windows_mc = define_from_elf + "bignum_mod_p256k1_windows_mc" "x86/secp256k1/bignum_mod_p256k1.obj";; + +let bignum_mod_p256k1_windows_tmc = define_trimmed "bignum_mod_p256k1_windows_tmc" bignum_mod_p256k1_windows_mc;; + +let BIGNUM_MOD_P256K1_WINDOWS_CORRECT = time prove + (`!z k x n pc. + nonoverlapping (word pc,0xf9) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_windows_tmc /\ + read RIP s = word(pc + 0xe) /\ + C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = word (pc + 0xc7) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RIP; RSI; RAX; RDX; RCX; RBX; R8; R9; R10; R11; R12] ,, + MAYCHANGE SOME_FLAGS ,, MAYCHANGE [events] ,, + MAYCHANGE [memory :> bignum(z,4)])`, + tac (X86_MK_EXEC_RULE bignum_mod_p256k1_windows_tmc) + (curry mk_comb `(+) (pc:num)` o mk_small_numeral o (fun n -> n+11)));; + +let BIGNUM_MOD_P256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 32),40) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 32),32)) + [(word pc,LENGTH bignum_mod_p256k1_windows_tmc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_p256k1_windows_tmc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_windows_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + WINDOWS_C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RSP] ,, WINDOWS_MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 32),32)])`, + GEN_X86_ADD_RETURN_STACK_TAC (X86_MK_EXEC_RULE bignum_mod_p256k1_windows_tmc) + BIGNUM_MOD_P256K1_WINDOWS_CORRECT + `[RDI; RSI; RBX; R12]` 32 (7,5));; + +let BIGNUM_MOD_P256K1_WINDOWS_SUBROUTINE_CORRECT = time prove + (`!z k x n pc stackpointer returnaddress. + nonoverlapping (word_sub stackpointer (word 32),40) (z,32) /\ + ALL (nonoverlapping (word_sub stackpointer (word 32),32)) + [(word pc,LENGTH bignum_mod_p256k1_windows_mc); (x, 8 * val k)] /\ + nonoverlapping (word pc,LENGTH bignum_mod_p256k1_windows_mc) (z,32) + ==> ensures x86 + (\s. bytes_loaded s (word pc) bignum_mod_p256k1_windows_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + WINDOWS_C_ARGUMENTS [z; k; x] s /\ + bignum_from_memory (x,val k) s = n) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + bignum_from_memory (z,4) s = n MOD p_256k1) + (MAYCHANGE [RSP] ,, WINDOWS_MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bignum(z,4); + memory :> bytes(word_sub stackpointer (word 32),32)])`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE BIGNUM_MOD_P256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT));; diff --git a/x86/proofs/specifications.txt b/x86/proofs/specifications.txt index 53aed50c1..8f4c986df 100644 --- a/x86/proofs/specifications.txt +++ b/x86/proofs/specifications.txt @@ -538,6 +538,10 @@ BIGNUM_MOD_N256K1_4_NOIBT_SUBROUTINE_CORRECT BIGNUM_MOD_N256K1_4_NOIBT_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_N256K1_4_SUBROUTINE_CORRECT BIGNUM_MOD_N256K1_4_WINDOWS_SUBROUTINE_CORRECT +BIGNUM_MOD_N256K1_NOIBT_SUBROUTINE_CORRECT +BIGNUM_MOD_N256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT +BIGNUM_MOD_N256K1_SUBROUTINE_CORRECT +BIGNUM_MOD_N256K1_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_N256_4_NOIBT_SUBROUTINE_CORRECT BIGNUM_MOD_N256_4_NOIBT_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_N256_4_SUBROUTINE_CORRECT @@ -598,6 +602,10 @@ BIGNUM_MOD_P256K1_4_NOIBT_SUBROUTINE_CORRECT BIGNUM_MOD_P256K1_4_NOIBT_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_P256K1_4_SUBROUTINE_CORRECT BIGNUM_MOD_P256K1_4_WINDOWS_SUBROUTINE_CORRECT +BIGNUM_MOD_P256K1_NOIBT_SUBROUTINE_CORRECT +BIGNUM_MOD_P256K1_NOIBT_WINDOWS_SUBROUTINE_CORRECT +BIGNUM_MOD_P256K1_SUBROUTINE_CORRECT +BIGNUM_MOD_P256K1_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_P256_4_NOIBT_SUBROUTINE_CORRECT BIGNUM_MOD_P256_4_NOIBT_WINDOWS_SUBROUTINE_CORRECT BIGNUM_MOD_P256_4_SUBROUTINE_CORRECT From 45e94a4cfb45997a61640f6e68d5c0b9892a3ffa Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 25 Feb 2026 11:49:38 -0800 Subject: [PATCH 4/6] Add new secp256k1 modulus operations to non_ct_functions.txt --- non_ct_functions.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/non_ct_functions.txt b/non_ct_functions.txt index 5b9fe753c..9ef51d942 100644 --- a/non_ct_functions.txt +++ b/non_ct_functions.txt @@ -6,3 +6,7 @@ p384/bignum_mod_n384o: p384/bignum_mod_n384_alto: p384/bignum_mod_p384o: p384/bignum_mod_p384_alto: +secp256k1/bignum_mod_n256k1o: +secp256k1/bignum_mod_n256k1_alto: +secp256k1/bignum_mod_p256k1o: +secp256k1/bignum_mod_p256k1_alto: From 5cfcffe832011873268aae868c7885cf025a1ee7 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 26 Feb 2026 00:43:21 -0800 Subject: [PATCH 5/6] Partially fix up proofs --- arm/proofs/bignum_mod_n256k1.ml | 18 ++++++------------ arm/proofs/bignum_mod_p256k1.ml | 18 ++++++------------ x86/proofs/bignum_mod_n256k1.ml | 8 +++++--- x86/proofs/bignum_mod_p256k1.ml | 8 +++++--- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/arm/proofs/bignum_mod_n256k1.ml b/arm/proofs/bignum_mod_n256k1.ml index 0c47b10dc..ee908a1b4 100644 --- a/arm/proofs/bignum_mod_n256k1.ml +++ b/arm/proofs/bignum_mod_n256k1.ml @@ -308,18 +308,12 @@ let BIGNUM_MOD_N256K1_CORRECT = time prove REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN BOOL_CASES_TAC `carry_s1:bool` THEN ASM_REWRITE_TAC[BITVAL_CLAUSES; ADD_CLAUSES; MULT_CLAUSES] THEN - DISCH_TAC THENL - [SUBGOAL_THEN - `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = 2 EXP 64 - 1` - SUBST1_TAC THENL - [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; - CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV]; - SUBGOAL_THEN - `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = val m4 + 1` - SUBST1_TAC THENL - [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; - GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - AP_TERM_TAC THEN ASM_ARITH_TAC]]; + DISCH_THEN(SUBST1_TAC o SYM) THENL + [REWRITE_TAC[ARITH_RULE + `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV); + SIMP_TAC[VAL_BOUND_64; WORD_VAL; ARITH_RULE + `a < 2 EXP 64 ==> MIN a (2 EXP 64 - 1) = a`]]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) diff --git a/arm/proofs/bignum_mod_p256k1.ml b/arm/proofs/bignum_mod_p256k1.ml index 4d17d01ed..7660881ae 100644 --- a/arm/proofs/bignum_mod_p256k1.ml +++ b/arm/proofs/bignum_mod_p256k1.ml @@ -291,18 +291,12 @@ let BIGNUM_MOD_P256K1_CORRECT = time prove REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN BOOL_CASES_TAC `carry_s1:bool` THEN ASM_REWRITE_TAC[BITVAL_CLAUSES; ADD_CLAUSES; MULT_CLAUSES] THEN - DISCH_TAC THENL - [SUBGOAL_THEN - `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = 2 EXP 64 - 1` - SUBST1_TAC THENL - [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; - CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV]; - SUBGOAL_THEN - `MIN (val(m4:int64) + 1) (2 EXP 64 - 1) = val m4 + 1` - SUBST1_TAC THENL - [MP_TAC(SPEC `sum_s1:int64` VAL_BOUND_64) THEN ASM_ARITH_TAC; - GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - AP_TERM_TAC THEN ASM_ARITH_TAC]]; + DISCH_THEN(SUBST1_TAC o SYM) THENL + [REWRITE_TAC[ARITH_RULE + `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV); + SIMP_TAC[VAL_BOUND_64; WORD_VAL; ARITH_RULE + `a < 2 EXP 64 ==> MIN a (2 EXP 64 - 1) = a`]]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) diff --git a/x86/proofs/bignum_mod_n256k1.ml b/x86/proofs/bignum_mod_n256k1.ml index 00a2ad578..0f3dcd111 100644 --- a/x86/proofs/bignum_mod_n256k1.ml +++ b/x86/proofs/bignum_mod_n256k1.ml @@ -329,15 +329,17 @@ let tac execth offset = ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN REWRITE_TAC[WORD_OR_MASK] THEN ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN - REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN DISCH_TAC THEN - COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THENL + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + REWRITE_TAC[MULT_CLAUSES; ADD_CLAUSES] THEN + DISCH_THEN(SUBST1_TAC o SYM) THENL [REWRITE_TAC[ARITH_RULE `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; SIMP_TAC[VAL_BOUND_64; ARITH_RULE `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - AP_TERM_TAC THEN ASM_ARITH_TAC]; + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV)]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) diff --git a/x86/proofs/bignum_mod_p256k1.ml b/x86/proofs/bignum_mod_p256k1.ml index a7bbfb0ab..6fd5d8e18 100644 --- a/x86/proofs/bignum_mod_p256k1.ml +++ b/x86/proofs/bignum_mod_p256k1.ml @@ -302,15 +302,17 @@ let tac execth offset = ASM_REWRITE_TAC[bignum_of_wordlist; ADD_CLAUSES; MULT_CLAUSES] THEN REWRITE_TAC[WORD_OR_MASK] THEN ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ) THEN - REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN DISCH_TAC THEN - COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THENL + REWRITE_TAC[REAL_OF_NUM_CLAUSES] THEN + COND_CASES_TAC THEN ASM_REWRITE_TAC[BITVAL_CLAUSES] THEN + REWRITE_TAC[MULT_CLAUSES; ADD_CLAUSES] THEN + DISCH_THEN(SUBST1_TAC o SYM) THENL [REWRITE_TAC[ARITH_RULE `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; SIMP_TAC[VAL_BOUND_64; ARITH_RULE `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - AP_TERM_TAC THEN ASM_ARITH_TAC]; + CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV)]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) From 631ec2677904ba46c79f5b0c16a9161f1b8a9f9b Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 26 Feb 2026 22:35:56 -0800 Subject: [PATCH 6/6] Fix x86 proofs Claude eventually converged on something correct, I think --- x86/proofs/bignum_mod_n256k1.ml | 18 ++++++++++-------- x86/proofs/bignum_mod_p256k1.ml | 10 ++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/x86/proofs/bignum_mod_n256k1.ml b/x86/proofs/bignum_mod_n256k1.ml index 0f3dcd111..5d2423ed6 100644 --- a/x86/proofs/bignum_mod_n256k1.ml +++ b/x86/proofs/bignum_mod_n256k1.ml @@ -321,7 +321,7 @@ let tac execth offset = X86_ACCSTEPS_TAC execth [2] (1--2) THEN X86_STEPS_TAC execth (3--5) THEN FIRST_X_ASSUM(MP_TAC o SPEC `word q:int64` o MATCH_MP (MESON[] - `!q. read RAX s = q' ==> q' = q ==> read RAX s = q`)) THEN + `!q. read RBX s = q' ==> q' = q ==> read RBX s = q`)) THEN ANTS_TAC THENL [EXPAND_TAC "q" THEN EXPAND_TAC "m" THEN @@ -336,10 +336,8 @@ let tac execth offset = [REWRITE_TAC[ARITH_RULE `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; - SIMP_TAC[VAL_BOUND_64; ARITH_RULE - `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN - GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV)]; + SIMP_TAC[VAL_BOUND_64; WORD_VAL; ARITH_RULE + `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`]]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) @@ -357,6 +355,7 @@ let tac execth offset = ABBREV_TAC `w:int64 = word q` THEN UNDISCH_THEN `val(w:int64) = q` (SUBST_ALL_TAC o SYM) THEN + RULE_ASSUM_TAC(REWRITE_RULE[WORD_VAL]) THEN ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC o end_itlist CONJ) THEN (*** Subtraction of q * n_256k1 ***) @@ -378,13 +377,16 @@ let tac execth offset = CONJ_TAC THENL [BOUNDER_TAC[]; CONV_TAC(ONCE_DEPTH_CONV NUM_ADD_CONV)] THEN ASM_REWRITE_TAC[] THEN EXPAND_TAC "m" THEN REWRITE_TAC[bignum_of_wordlist; GSYM REAL_OF_NUM_CLAUSES; n_256k1] THEN - ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o DESUM_RULE) THEN + ACCUMULATOR_ASSUM_LIST(MP_TAC o end_itlist CONJ o + DESUM_RULE o map(REWRITE_RULE[VAL_WORD_BITVAL; ADD_CLAUSES])) THEN DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REPEAT(MATCH_MP_TAC INTEGER_ADD THEN CONJ_TAC) THEN TRY REAL_INTEGER_TAC THEN - ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o DECARRY_RULE) THEN - DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN REAL_INTEGER_TAC; + ACCUMULATOR_POP_ASSUM_LIST(MP_TAC o end_itlist CONJ o + DECARRY_RULE o map(REWRITE_RULE[VAL_WORD_BITVAL; ADD_CLAUSES])) THEN + DISCH_THEN(fun th -> REWRITE_TAC[th]) THEN + REWRITE_TAC[REAL_MUL_RZERO; REAL_MUL_LZERO] THEN REAL_INTEGER_TAC; ACCUMULATOR_POP_ASSUM_LIST(K ALL_TAC)] THEN (*** Final correction ***) diff --git a/x86/proofs/bignum_mod_p256k1.ml b/x86/proofs/bignum_mod_p256k1.ml index 6fd5d8e18..a62593177 100644 --- a/x86/proofs/bignum_mod_p256k1.ml +++ b/x86/proofs/bignum_mod_p256k1.ml @@ -309,10 +309,8 @@ let tac execth offset = [REWRITE_TAC[ARITH_RULE `MIN (2 EXP 64 + a) (2 EXP 64 - 1) = 2 EXP 64 - 1`] THEN CONV_TAC NUM_REDUCE_CONV THEN CONV_TAC WORD_REDUCE_CONV; - SIMP_TAC[VAL_BOUND_64; ARITH_RULE - `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`] THEN - GEN_REWRITE_TAC LAND_CONV [GSYM WORD_VAL] THEN - CONV_TAC(DEPTH_CONV WORD_NUM_RED_CONV)]; + SIMP_TAC[VAL_BOUND_64; WORD_VAL; ARITH_RULE + `n < 2 EXP 64 ==> MIN n (2 EXP 64 - 1) = n`]]; DISCH_TAC THEN VAL_INT64_TAC `q:num`] THEN (*** The next digit in the current state ***) @@ -394,7 +392,7 @@ let tac execth offset = let BIGNUM_MOD_P256K1_CORRECT = time prove (`!z k x n pc. - nonoverlapping (word pc,0xec) (z,32) + nonoverlapping (word pc,0xe8) (z,32) ==> ensures x86 (\s. bytes_loaded s (word pc) bignum_mod_p256k1_tmc /\ read RIP s = word(pc + 0x3) /\ @@ -461,7 +459,7 @@ let bignum_mod_p256k1_windows_tmc = define_trimmed "bignum_mod_p256k1_windows_tm let BIGNUM_MOD_P256K1_WINDOWS_CORRECT = time prove (`!z k x n pc. - nonoverlapping (word pc,0xf9) (z,32) + nonoverlapping (word pc,0xf5) (z,32) ==> ensures x86 (\s. bytes_loaded s (word pc) bignum_mod_p256k1_windows_tmc /\ read RIP s = word(pc + 0xe) /\